Index: llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h =================================================================== --- llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -37,6 +37,8 @@ // The offset of the DWARF CFA from the incoming stack pointer. const int64_t CFAOffsetFromInitialSP = CallFrameSize; +const int64_t FPArgRegSaveAreaSize = 32; + // Maps of asm register numbers to LLVM register numbers, with 0 indicating // an invalid register. In principle we could use 32-bit and 64-bit register // classes directly, provided that we relegated the GPR allocation order Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -24,8 +24,10 @@ // Override TargetFrameLowering. bool isFPCloseToIncomingSP() const override { return false; } - const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const - override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -43,6 +45,8 @@ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -47,12 +47,13 @@ SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), - -SystemZMC::CallFrameSize, Align(8), - false /* StackRealignable */) { + 0, Align(8), false /* StackRealignable */), + RegSpillOffsets(0) { // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not // equal to the incoming stack pointer, but to incoming stack pointer plus - // 160. The getOffsetOfLocalArea() returned value is interpreted as "the - // offset of the local area from the CFA". + // 160. Instead of using a Local Area Offset, the Register save area will + // be occupied by fixed frame objects, and all offsets are actually + // relative to CFA. // Create a mapping from register number to save slot offset. // These offsets are relative to the start of the register save area. @@ -62,10 +63,67 @@ SystemZMC::CallFrameSize + SpillOffsetTable[I].Offset; } -const TargetFrameLowering::SpillSlot * -SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = array_lengthof(SpillOffsetTable); - return SpillOffsetTable; +bool SystemZFrameLowering:: +assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + int StartAdjust = ZFI->usePackedStack(MF) ? SystemZMC::FPArgRegSaveAreaSize : 0; + int StartSPOffset = SystemZMC::CallFrameSize + StartAdjust; + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg)) { + Offset += StartAdjust; + if (StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; + } + } else if (ZFI->usePackedStack(MF)) { + // Wait with saving this FP Arg reg. + ZFI->incRegSaveAreaUsage(8); + CS.setFrameIdx(INT32_MAX); + continue; + } + Offset -= SystemZMC::CallFrameSize; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + int Offset; + if (ZFI->usePackedStack(MF)) { + unsigned GPRUsage = LowGPR ? SystemZMC::CallFrameSize - StartSPOffset : 0; + ZFI->incRegSaveAreaUsage(GPRUsage); + Offset = -GPRUsage; + } else + Offset = -SystemZMC::CallFrameSize; + + // Save the range of call-saved registers, for use by the prologue/epilogue inserters. + ZFI->setLowSavedGPR(LowGPR); + ZFI->setHighSavedGPR(HighGPR); + ZFI->setLowGPROffset(StartSPOffset); + + // Create fixed stack objects for the remaining registers. + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + Offset -= Size; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, Offset); + CS.setFrameIdx(FrameIdx); + } + + return true; } void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -73,6 +131,7 @@ RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool HasFP = hasFP(MF); @@ -115,6 +174,17 @@ break; } } + + if (!ZFI->usePackedStack(MF)) { + // Create the Register save area regardless of how may CSRs will be saved. + int BackChainIdx = ZFI->getFramePointerSaveIndex(); + if (!BackChainIdx) { + int BackChainIdx = + MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + ZFI->setFramePointerSaveIndex(BackChainIdx); + } + ZFI->incRegSaveAreaUsage(SystemZMC::CallFrameSize); + } } // Add GPR64 to the save instruction being built by MIB, which is in basic @@ -148,25 +218,9 @@ bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; - // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned LowGPR = 0; - unsigned HighGPR = SystemZ::R15D; - unsigned StartOffset = -1U; - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); - if (SystemZ::GR64BitRegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - assert(Offset && "Unexpected GPR save"); - if (StartOffset > Offset) { - LowGPR = Reg; - StartOffset = Offset; - } - } - } - - // Save the range of call-saved registers, for use by the epilogue inserter. - ZFI->setLowSavedGPR(LowGPR); - ZFI->setHighSavedGPR(HighGPR); + unsigned LowGPR = ZFI->getLowSavedGPR(); + unsigned HighGPR = ZFI->getHighSavedGPR(); + unsigned StartSPOffset = ZFI->getLowGPROffset(); // Include the GPR varargs, if any. R6D is call-saved, so would // be included by the loop above, but we also need to handle the @@ -176,8 +230,8 @@ if (FirstGPR < SystemZ::NumArgGPRs) { unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; unsigned Offset = RegSpillOffsets[Reg]; - if (StartOffset > Offset) { - LowGPR = Reg; StartOffset = Offset; + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; } } } @@ -194,7 +248,7 @@ addSavedGPR(MBB, MIB, HighGPR, false); // Add the address. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); + MIB.addReg(SystemZ::R15D).addImm(StartSPOffset); // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. @@ -257,7 +311,7 @@ // this point might hold return values). unsigned LowGPR = ZFI->getLowSavedGPR(); unsigned HighGPR = ZFI->getHighSavedGPR(); - unsigned StartOffset = RegSpillOffsets[LowGPR]; + unsigned StartSPOffset = ZFI->getLowGPROffset(); if (LowGPR) { // If we saved any of %r2-%r5 as varargs, we should also be saving // and restoring %r6. If we're saving %r6 or above, we should be @@ -273,7 +327,7 @@ // Add the address. MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); - MIB.addImm(StartOffset); + MIB.addImm(StartSPOffset); // Do a second scan adding regs as being defined by instruction for (unsigned I = 0, E = CSI.size(); I != E; ++I) { @@ -292,20 +346,18 @@ RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); // Get the size of our stack frame to be allocated ... - uint64_t StackSize = (MFFrame.estimateStackSize(MF) + - SystemZMC::CallFrameSize); + uint64_t StackSize = MFFrame.estimateStackSize(MF); // ... and the maximum offset we may need to reach into the // caller's frame to access the save area or stack arguments. - int64_t MaxArgOffset = SystemZMC::CallFrameSize; + int64_t MaxArgOffset = 0; for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) if (MFFrame.getObjectOffset(I) >= 0) { - int64_t ArgOffset = SystemZMC::CallFrameSize + - MFFrame.getObjectOffset(I) + + int64_t ArgOffset = MFFrame.getObjectOffset(I) + MFFrame.getObjectSize(I); MaxArgOffset = std::max(MaxArgOffset, ArgOffset); } - uint64_t MaxReach = StackSize + MaxArgOffset; + uint64_t MaxReach = StackSize + MaxArgOffset + SystemZMC::CallFrameSize; if (!isUInt<12>(MaxReach)) { // We may need register scavenging slots if some parts of the frame // are outside the reach of an unsigned 12-bit displacement. @@ -393,7 +445,8 @@ for (auto &Save : CSI) { unsigned Reg = Save.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -403,13 +456,15 @@ } uint64_t StackSize = MFFrame.getStackSize(); + bool OtherStackThanRegSaveArea = StackSize > ZFI->getRegSaveAreaUsage(); // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. - if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) { + if (OtherStackThanRegSaveArea || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) StackSize += SystemZMC::CallFrameSize; - MFFrame.setStackSize(StackSize); - } + // Don't allocate the incoming reg save area. + StackSize = (StackSize > SystemZMC::CallFrameSize) ? StackSize - SystemZMC::CallFrameSize : 0; + MFFrame.setStackSize(StackSize); if (StackSize) { // Determine if we want to store a backchain. @@ -434,7 +489,8 @@ if (StoreBackchain) BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) + .addReg(0); } if (HasFP) { @@ -555,6 +611,16 @@ return true; } +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so + // add that difference here. + int64_t Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + return Offset + SystemZMC::CallFrameSize; +} + MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1410,7 +1410,7 @@ // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + int64_t RegSaveOffset = -SystemZMC::CallFrameSize; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -9,7 +9,9 @@ #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" namespace llvm { @@ -24,12 +26,15 @@ int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; + unsigned RegSaveAreaUsage; + unsigned LowGPROffset; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} + ManipulatesSP(false), NumLocalDynamics(0), RegSaveAreaUsage(0), + LowGPROffset(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -71,6 +76,22 @@ // Count number of local-dynamic TLS symbols used. unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + unsigned getRegSaveAreaUsage() const { return RegSaveAreaUsage; } + void incRegSaveAreaUsage(unsigned Inc) { RegSaveAreaUsage += Inc; } + + bool usePackedStack(MachineFunction &MF) { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + bool IsVarArg = MF.getFunction().isVarArg(); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); + return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && + !FramAddressTaken; + } + + void setLowGPROffset(unsigned Offs) { LowGPROffset = Offs; } + unsigned getLowGPROffset() const { return LowGPROffset; } }; } // end namespace llvm Index: llvm/test/CodeGen/SystemZ/frame-22.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-22.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test the packed stack layout. + +; Test spill/restore of an FPR and a GPR. +define void @f1() #0 { +; CHECK-LABEL: f1: +; CHECK: stmg %r12, %r15, 128(%r15) +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -40 +; CHECK-NEXT: .cfi_def_cfa_offset 200 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -40 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 168(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8},~{r12}"() nounwind + ret void +} + +; Test spill/restore with anyregcc, including an FP argument register. +define anyregcc void @f2() #0 { +; CHECK-LABEL: f2: +; CHECK: stmg %r3, %r15, 56(%r15) +; CHECK-NEXT: .cfi_offset %r3, -104 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -120 +; CHECK-NEXT: .cfi_def_cfa_offset 280 +; CHECK-NEXT: std %f0, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f1, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f0, -112 +; CHECK-NEXT: .cfi_offset %f1, -120 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f0, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r3, %r15, 176(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f0},~{f1},~{r3}"() nounwind + ret void +} + +; Test spill/restore in local area with incoming stack arguments. +define i64 @f3(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + double %A, double %B, double %C, double %D, double %E) #0 { +; CHECK-LABEL: f3: +; CHECK: aghi %r15, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 168 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -8 +; CHECK-NEXT: ld %f0, 176(%r15) +; CHECK-NEXT: cgdbr %r2, 5, %f0 +; CHECK-NEXT: ag %r2, 168(%r15) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: aghi %r15, 8 +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %Ei = fptosi double %E to i64 + %S = add i64 %f, %Ei + ret i64 %S +} + +; Test spill/restore in local area with outgoing stack arguments. +define i64 @f4() #0 { +; CHECK-LABEL: f4: +; CHECK: stmg %r6, %r15, 80(%r15) +; CHECK-NEXT: .cfi_offset %r6, -80 +; CHECK-NEXT: .cfi_offset %r14, -16 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -104 +; CHECK-NEXT: .cfi_def_cfa_offset 264 +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: llihh %r0, 16404 +; CHECK-NEXT: stg %r0, 168(%r15) +; CHECK: mvghi 160(%r15), 6 +; CHECK-NEXT: brasl %r14, f3@PLT +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r6, %r15, 184(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %C = call i64 @f3 (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0) + ret i64 %C +} + +attributes #0 = { "packed-stack"="true" }