Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -24,8 +24,10 @@ // Override TargetFrameLowering. bool isFPCloseToIncomingSP() const override { return false; } - const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const - override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -43,6 +45,8 @@ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; @@ -52,6 +56,9 @@ unsigned getRegSpillOffset(unsigned Reg) const { return RegSpillOffsets[Reg]; } + + // Get or create the frame index of where the old frame pointer is stored. + int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const; }; } // end namespace llvm Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -24,48 +24,146 @@ // The ABI-defined register save slots, relative to the CFA (i.e. // incoming stack pointer + SystemZMC::CallFrameSize). static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { - { SystemZ::R2D, -SystemZMC::CallFrameSize + 0x10 }, - { SystemZ::R3D, -SystemZMC::CallFrameSize + 0x18 }, - { SystemZ::R4D, -SystemZMC::CallFrameSize + 0x20 }, - { SystemZ::R5D, -SystemZMC::CallFrameSize + 0x28 }, - { SystemZ::R6D, -SystemZMC::CallFrameSize + 0x30 }, - { SystemZ::R7D, -SystemZMC::CallFrameSize + 0x38 }, - { SystemZ::R8D, -SystemZMC::CallFrameSize + 0x40 }, - { SystemZ::R9D, -SystemZMC::CallFrameSize + 0x48 }, - { SystemZ::R10D, -SystemZMC::CallFrameSize + 0x50 }, - { SystemZ::R11D, -SystemZMC::CallFrameSize + 0x58 }, - { SystemZ::R12D, -SystemZMC::CallFrameSize + 0x60 }, - { SystemZ::R13D, -SystemZMC::CallFrameSize + 0x68 }, - { SystemZ::R14D, -SystemZMC::CallFrameSize + 0x70 }, - { SystemZ::R15D, -SystemZMC::CallFrameSize + 0x78 }, - { SystemZ::F0D, -SystemZMC::CallFrameSize + 0x80 }, - { SystemZ::F2D, -SystemZMC::CallFrameSize + 0x88 }, - { SystemZ::F4D, -SystemZMC::CallFrameSize + 0x90 }, - { SystemZ::F6D, -SystemZMC::CallFrameSize + 0x98 } + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 }, + { SystemZ::F0D, 0x80 }, + { SystemZ::F2D, 0x88 }, + { SystemZ::F4D, 0x90 }, + { SystemZ::F6D, 0x98 } }; } // end anonymous namespace SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), - -SystemZMC::CallFrameSize, Align(8), - false /* StackRealignable */) { + 0, Align(8), false /* StackRealignable */), + RegSpillOffsets(0) { // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not // equal to the incoming stack pointer, but to incoming stack pointer plus - // 160. The getOffsetOfLocalArea() returned value is interpreted as "the - // offset of the local area from the CFA". + // 160. Instead of using a Local Area Offset, the Register save area will + // be occupied by fixed frame objects, and all offsets are actually + // relative to CFA. // Create a mapping from register number to save slot offset. // These offsets are relative to the start of the register save area. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I].Reg] = - SystemZMC::CallFrameSize + SpillOffsetTable[I].Offset; + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; } -const TargetFrameLowering::SpillSlot * -SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = array_lengthof(SpillOffsetTable); - return SpillOffsetTable; +static bool usePackedStack(MachineFunction &MF) { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + bool IsVarArg = MF.getFunction().isVarArg(); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); + return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && + !FramAddressTaken; +} + +bool SystemZFrameLowering:: +assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + bool IsVarArg = MF.getFunction().isVarArg(); + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + int StartSPOffset = SystemZMC::CallFrameSize; + int CurrOffset; + if (!usePackedStack(MF)) { + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; + } + Offset -= SystemZMC::CallFrameSize; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + if (IsVarArg) { + // Also save the GPR varargs, if any. R6D is call-saved, so would + // already be included, but we also need to handle the call-clobbered + // argument registers. + unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::NumArgGPRs) { + unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; + int Offset = RegSpillOffsets[Reg]; + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; + } + } + } + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + + CurrOffset = -SystemZMC::CallFrameSize; + } else { + // Packed stack: put all the GPRs at the top of the Register save area. + uint32_t LowGR64Num = UINT32_MAX; + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + unsigned GR64Num = SystemZMC::getFirstReg(Reg); + int Offset = -8 * (15 - GR64Num + 1); + if (LowGR64Num > GR64Num) { + LowGR64Num = GR64Num; + StartSPOffset = SystemZMC::CallFrameSize + Offset; + } + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + if (LowGR64Num < UINT32_MAX) + LowGPR = SystemZMC::GR64Regs[LowGR64Num]; + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + + CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0; + } + + // Create fixed stack objects for the remaining registers. + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + CurrOffset -= Size; + assert(CurrOffset % 8 == 0 && + "8-byte alignment required for for all register save slots"); + int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset); + CS.setFrameIdx(FrameIdx); + } + + return true; } void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -148,53 +246,21 @@ bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; - // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned LowGPR = 0; - unsigned HighGPR = SystemZ::R15D; - unsigned StartOffset = -1U; - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); - if (SystemZ::GR64BitRegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - assert(Offset && "Unexpected GPR save"); - if (StartOffset > Offset) { - LowGPR = Reg; - StartOffset = Offset; - } - } - } - - // Save the range of call-saved registers, for use by the epilogue inserter. - ZFI->setLowSavedGPR(LowGPR); - ZFI->setHighSavedGPR(HighGPR); - - // Include the GPR varargs, if any. R6D is call-saved, so would - // be included by the loop above, but we also need to handle the - // call-clobbered argument registers. - if (IsVarArg) { - unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); - if (FirstGPR < SystemZ::NumArgGPRs) { - unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; - unsigned Offset = RegSpillOffsets[Reg]; - if (StartOffset > Offset) { - LowGPR = Reg; StartOffset = Offset; - } - } - } - // Save GPRs - if (LowGPR) { - assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); + SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); + if (SpillGPRs.LowGPR) { + assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && + "Should be saving %r15 and something else"); // Build an STMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); // Add the explicit register operands. - addSavedGPR(MBB, MIB, LowGPR, false); - addSavedGPR(MBB, MIB, HighGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false); // Add the address. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); + MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset); // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. @@ -255,30 +321,29 @@ // Restore call-saved GPRs (but not call-clobbered varargs, which at // this point might hold return values). - unsigned LowGPR = ZFI->getLowSavedGPR(); - unsigned HighGPR = ZFI->getHighSavedGPR(); - unsigned StartOffset = RegSpillOffsets[LowGPR]; - if (LowGPR) { + SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); + if (RestoreGPRs.LowGPR) { // If we saved any of %r2-%r5 as varargs, we should also be saving // and restoring %r6. If we're saving %r6 or above, we should be // restoring it too. - assert(LowGPR != HighGPR && "Should be loading %r15 and something else"); + assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR && + "Should be loading %r15 and something else"); // Build an LMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); // Add the explicit register operands. - MIB.addReg(LowGPR, RegState::Define); - MIB.addReg(HighGPR, RegState::Define); + MIB.addReg(RestoreGPRs.LowGPR, RegState::Define); + MIB.addReg(RestoreGPRs.HighGPR, RegState::Define); // Add the address. MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); - MIB.addImm(StartOffset); + MIB.addImm(RestoreGPRs.GPROffset); // Do a second scan adding regs as being defined by instruction for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); - if (Reg != LowGPR && Reg != HighGPR && + if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR && SystemZ::GR64BitRegClass.contains(Reg)) MIB.addReg(Reg, RegState::ImplicitDefine); } @@ -291,16 +356,20 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); + + if (!usePackedStack(MF)) + // Always create the full incoming register save area. + getOrCreateFramePointerSaveIndex(MF); + // Get the size of our stack frame to be allocated ... uint64_t StackSize = (MFFrame.estimateStackSize(MF) + SystemZMC::CallFrameSize); // ... and the maximum offset we may need to reach into the // caller's frame to access the save area or stack arguments. - int64_t MaxArgOffset = SystemZMC::CallFrameSize; + int64_t MaxArgOffset = 0; for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) if (MFFrame.getObjectOffset(I) >= 0) { - int64_t ArgOffset = SystemZMC::CallFrameSize + - MFFrame.getObjectOffset(I) + + int64_t ArgOffset = MFFrame.getObjectOffset(I) + MFFrame.getObjectSize(I); MaxArgOffset = std::max(MaxArgOffset, ArgOffset); } @@ -382,7 +451,7 @@ // The current offset of the stack pointer from the CFA. int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; - if (ZFI->getLowSavedGPR()) { + if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) ++MBBI; @@ -393,7 +462,8 @@ for (auto &Save : CSI) { unsigned Reg = Save.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -406,10 +476,19 @@ // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. - if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) { + bool HasStackObject = false; + for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i) + if (!MFFrame.isDeadObjectIndex(i)) { + HasStackObject = true; + break; + } + if (HasStackObject || MFFrame.hasCalls()) StackSize += SystemZMC::CallFrameSize; - MFFrame.setStackSize(StackSize); - } + // Don't allocate the incoming reg save area. + StackSize = StackSize > SystemZMC::CallFrameSize + ? StackSize - SystemZMC::CallFrameSize + : 0; + MFFrame.setStackSize(StackSize); if (StackSize) { // Determine if we want to store a backchain. @@ -434,7 +513,8 @@ if (StoreBackchain) BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) + .addReg(0); } if (HasFP) { @@ -510,7 +590,7 @@ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); uint64_t StackSize = MFFrame.getStackSize(); - if (ZFI->getLowSavedGPR()) { + if (ZFI->getRestoreGPRRegs().LowGPR) { --MBBI; unsigned Opcode = MBBI->getOpcode(); if (Opcode != SystemZ::LMG) @@ -555,6 +635,16 @@ return true; } +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so + // add that difference here. + int64_t Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + return Offset + SystemZMC::CallFrameSize; +} + MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -571,3 +661,15 @@ llvm_unreachable("Unexpected call frame instruction"); } } + +int SystemZFrameLowering:: +getOrCreateFramePointerSaveIndex(MachineFunction &MF) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + int FI = ZFI->getFramePointerSaveIndex(); + if (!FI) { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + ZFI->setFramePointerSaveIndex(FI); + } + return FI; +} Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1410,7 +1410,7 @@ // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + int64_t RegSaveOffset = -SystemZMC::CallFrameSize; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); @@ -3084,14 +3084,10 @@ unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - // If the back chain frame index has not been allocated yet, do so. - SystemZMachineFunctionInfo *FI = MF.getInfo(); - int BackChainIdx = FI->getFramePointerSaveIndex(); - if (!BackChainIdx) { - // By definition, the frame address is the address of the back chain. - BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); - FI->setFramePointerSaveIndex(BackChainIdx); - } + // By definition, the frame address is the address of the back chain. + auto *TFL = + static_cast(Subtarget.getFrameLowering()); + int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); // FIXME The frontend should detect this case. Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -13,10 +13,22 @@ namespace llvm { +namespace SystemZ { +// A struct to hold the low and high GPR registers to be saved/restored as +// well as the offset into the register save area of the low register. +struct GPRRegs { + unsigned LowGPR; + unsigned HighGPR; + unsigned GPROffset; + GPRRegs() : LowGPR(0), HighGPR(0), GPROffset(0) {} + }; +} + class SystemZMachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); - unsigned LowSavedGPR; - unsigned HighSavedGPR; + + SystemZ::GPRRegs SpillGPRRegs; + SystemZ::GPRRegs RestoreGPRRegs; unsigned VarArgsFirstGPR; unsigned VarArgsFirstFPR; unsigned VarArgsFrameIndex; @@ -27,19 +39,29 @@ public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} - - // Get and set the first call-saved GPR that should be saved and restored - // by this function. This is 0 if no GPRs need to be saved or restored. - unsigned getLowSavedGPR() const { return LowSavedGPR; } - void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; } - - // Get and set the last call-saved GPR that should be saved and restored - // by this function. - unsigned getHighSavedGPR() const { return HighSavedGPR; } - void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; } + : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), + RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false), + NumLocalDynamics(0) {} + + // Get and set the first and last call-saved GPR that should be saved by + // this function and the SP offset for the STMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; } + void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + SpillGPRRegs.LowGPR = Low; + SpillGPRRegs.HighGPR = High; + SpillGPRRegs.GPROffset = Offs; + } + + // Get and set the first and last call-saved GPR that should be restored by + // this function and the SP offset for the LMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; } + void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + RestoreGPRRegs.LowGPR = Low; + RestoreGPRRegs.HighGPR = High; + RestoreGPRRegs.GPROffset = Offs; + } // Get and set the number of fixed (as opposed to variable) arguments // that are passed in GPRs to this function. Index: llvm/test/CodeGen/SystemZ/anyregcc-novec.ll =================================================================== --- llvm/test/CodeGen/SystemZ/anyregcc-novec.ll +++ llvm/test/CodeGen/SystemZ/anyregcc-novec.ll @@ -5,14 +5,14 @@ entry: ;CHECK-LABEL: anyregcc1 ;CHECK: stmg %r2, %r15, 16(%r15) -;CHECK: aghi %r15, -256 -;CHECK: std %f0, 384(%r15) +;CHECK: aghi %r15, -96 +;CHECK: std %f0, 224(%r15) ;CHECK: std %f1, -;CHECK: std %f2, 392(%r15) +;CHECK: std %f2, 232(%r15) ;CHECK: std %f3, -;CHECK: std %f4, 400(%r15) +;CHECK: std %f4, 240(%r15) ;CHECK: std %f5, -;CHECK: std %f6, 408(%r15) +;CHECK: std %f6, 248(%r15) ;CHECK: std %f7, ;CHECK: std %f8, ;CHECK: std %f9, @@ -26,10 +26,10 @@ ;CHECK: .cfi_offset %f2, -24 ;CHECK: .cfi_offset %f4, -16 ;CHECK: .cfi_offset %f6, -8 -;CHECK: ld %f0, 384(%r15) -;CHECK: ld %f2, 392(%r15) -;CHECK: ld %f4, 400(%r15) -;CHECK: ld %f6, 408(%r15) +;CHECK: ld %f0, 224(%r15) +;CHECK: ld %f2, 232(%r15) +;CHECK: ld %f4, 240(%r15) +;CHECK: ld %f6, 248(%r15) call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15}"() nounwind ret void } Index: llvm/test/CodeGen/SystemZ/frame-02.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-02.ll +++ llvm/test/CodeGen/SystemZ/frame-02.ll @@ -4,20 +4,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(float *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -27,15 +27,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr @@ -92,15 +92,15 @@ ; so %f15 is the one that gets dropped. define void @f2(float *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -110,14 +110,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr @@ -170,9 +170,9 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(float *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: %f9 ; CHECK-NOT: %f10 @@ -182,8 +182,8 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr Index: llvm/test/CodeGen/SystemZ/frame-03.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-03.ll +++ llvm/test/CodeGen/SystemZ/frame-03.ll @@ -6,20 +6,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(double *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -29,15 +29,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -94,15 +94,15 @@ ; so %f15 is the one that gets dropped. define void @f2(double *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -112,14 +112,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -172,9 +172,9 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(double *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: %f9 ; CHECK-NOT: %f10 @@ -184,8 +184,8 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr Index: llvm/test/CodeGen/SystemZ/frame-04.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-04.ll +++ llvm/test/CodeGen/SystemZ/frame-04.ll @@ -5,20 +5,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(fp128 *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -28,15 +28,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr @@ -69,14 +69,14 @@ ; so %f13+%f15 is the pair that gets dropped. define void @f2(fp128 *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -208 -; CHECK: .cfi_def_cfa_offset 368 -; CHECK: std %f8, 200(%r15) -; CHECK: std %f9, 192(%r15) -; CHECK: std %f10, 184(%r15) -; CHECK: std %f11, 176(%r15) -; CHECK: std %f12, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -48 +; CHECK: .cfi_def_cfa_offset 208 +; CHECK: std %f8, 40(%r15) +; CHECK: std %f9, 32(%r15) +; CHECK: std %f10, 24(%r15) +; CHECK: std %f11, 16(%r15) +; CHECK: std %f12, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -86,13 +86,13 @@ ; CHECK-NOT: %f13 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 200(%r15) -; CHECK: ld %f9, 192(%r15) -; CHECK: ld %f10, 184(%r15) -; CHECK: ld %f11, 176(%r15) -; CHECK: ld %f12, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 208 +; CHECK: ld %f8, 40(%r15) +; CHECK: ld %f9, 32(%r15) +; CHECK: ld %f10, 24(%r15) +; CHECK: ld %f11, 16(%r15) +; CHECK: ld %f12, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 48 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr @@ -122,10 +122,10 @@ ; numerical order so the pair should be %f8+%f10. define void @f3(fp128 *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -176 -; CHECK: .cfi_def_cfa_offset 336 -; CHECK: std %f8, 168(%r15) -; CHECK: std %f10, 160(%r15) +; CHECK: aghi %r15, -16 +; CHECK: .cfi_def_cfa_offset 176 +; CHECK: std %f8, 8(%r15) +; CHECK: std %f10, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f10, -176 ; CHECK-NOT: %f9 @@ -135,9 +135,9 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 168(%r15) -; CHECK: ld %f10, 160(%r15) -; CHECK: aghi %r15, 176 +; CHECK: ld %f8, 8(%r15) +; CHECK: ld %f10, 0(%r15) +; CHECK: aghi %r15, 16 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr Index: llvm/test/CodeGen/SystemZ/frame-19.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-19.ll +++ llvm/test/CodeGen/SystemZ/frame-19.ll @@ -96,10 +96,11 @@ ret void } -; Like f1, but no 16-byte slot should be needed. +; Like f1, but no 16-byte slot should be needed, and no outgoing reg save +; area of 160 bytes. define void @f2(<16 x i8> *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -224 +; CHECK: aghi %r15, -64 ; CHECK-DAG: std %f8, ; CHECK-DAG: std %f9, ; CHECK-DAG: std %f10, @@ -118,7 +119,7 @@ ; CHECK-DAG: ld %f13, ; CHECK-DAG: ld %f14, ; CHECK-DAG: ld %f15, -; CHECK: aghi %r15, 224 +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %v0 = load volatile <16 x i8>, <16 x i8> *%ptr %v1 = load volatile <16 x i8>, <16 x i8> *%ptr @@ -190,7 +191,7 @@ ; Like f2, but only %f8 should be saved. define void @f3(<16 x i8> *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 +; CHECK: aghi %r15, -8 ; CHECK-DAG: std %f8, ; CHECK-NOT: vst {{.*}}(%r15) ; CHECK-NOT: vl {{.*}}(%r15) @@ -202,7 +203,7 @@ ; CHECK-NOT: %v14 ; CHECK-NOT: %v15 ; CHECK-DAG: ld %f8, -; CHECK: aghi %r15, 168 +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %v0 = load volatile <16 x i8>, <16 x i8> *%ptr %v1 = load volatile <16 x i8>, <16 x i8> *%ptr Index: llvm/test/CodeGen/SystemZ/frame-20.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-20.ll +++ llvm/test/CodeGen/SystemZ/frame-20.ll @@ -5,20 +5,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(double *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -35,15 +35,15 @@ ; CHECK-DAG: vlrepg %v23, 0(%r2) ; CHECK-DAG: vlrepg %v24, 0(%r2) ; CHECK-DAG: vlrepg %v31, 0(%r2) -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -148,15 +148,15 @@ ; so %f15 is the one that gets dropped. define void @f2(double *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -166,14 +166,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %v15 ; CHECK-NOT: %f15 -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -274,14 +274,14 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(double *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: {{%[fv]9}} ; CHECK-NOT: {{%[fv]1[0-5]}} -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr Index: llvm/test/CodeGen/SystemZ/frame-22.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-22.ll @@ -0,0 +1,87 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test the packed stack layout. + +; Test spill/restore of an FPR and a GPR. +define void @f1() #0 { +; CHECK-LABEL: f1: +; CHECK: stmg %r12, %r15, 128(%r15) +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: std %f8, 120(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -40 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 120(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 128(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8},~{r12}"() nounwind + ret void +} + +; Test spill/restore with anyregcc, including an FP argument register. +define anyregcc void @f2() #0 { +; CHECK-LABEL: f2: +; CHECK: stmg %r3, %r15, 56(%r15) +; CHECK-NEXT: .cfi_offset %r3, -104 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: std %f0, 48(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f1, 40(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f0, -112 +; CHECK-NEXT: .cfi_offset %f1, -120 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f0, 48(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f1, 40(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r3, %r15, 56(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f0},~{f1},~{r3}"() nounwind + ret void +} + +; Test spill/restore in local area with incoming stack arguments. +define i64 @f3(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + double %A, double %B, double %C, double %D, double %E) #0 { +; CHECK-LABEL: f3: +; CHECK: std %f8, 152(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -8 +; CHECK-NEXT: ld %f0, 168(%r15) +; CHECK-NEXT: cgdbr %r2, 5, %f0 +; CHECK-NEXT: ag %r2, 160(%r15) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 152(%r15) # 8-byte Folded Reload +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %Ei = fptosi double %E to i64 + %S = add i64 %f, %Ei + ret i64 %S +} + +; Test spill/restore in local area with outgoing stack arguments. +define i64 @f4() #0 { +; CHECK-LABEL: f4: +; CHECK: stmg %r6, %r15, 80(%r15) +; CHECK-NEXT: .cfi_offset %r6, -80 +; CHECK-NEXT: .cfi_offset %r14, -16 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -104 +; CHECK-NEXT: .cfi_def_cfa_offset 264 +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: llihh %r0, 16404 +; CHECK-NEXT: stg %r0, 168(%r15) +; CHECK: mvghi 160(%r15), 6 +; CHECK-NEXT: brasl %r14, f3@PLT +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r6, %r15, 184(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %C = call i64 @f3 (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0) + ret i64 %C +} + +attributes #0 = { "packed-stack"="true" }