Index: llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h =================================================================== --- llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -37,6 +37,8 @@ // The offset of the DWARF CFA from the incoming stack pointer. const int64_t CFAOffsetFromInitialSP = CallFrameSize; +const int64_t FPArgRegSaveAreaSize = 32; + // Maps of asm register numbers to LLVM register numbers, with 0 indicating // an invalid register. In principle we could use 32-bit and 64-bit register // classes directly, provided that we relegated the GPR allocation order Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -43,6 +43,8 @@ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -139,15 +139,20 @@ MachineBasicBlock::iterator MBBI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; + if (!ZFI->usePackedStack(MF)) + ZFI->incRegSaveAreaUsage(SystemZMC::CallFrameSize); + + if (CSI.empty()) + return false; + // Scan the call-saved GPRs and find the bounds of the register spill area. unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; @@ -186,6 +191,20 @@ if (LowGPR) { assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); + if (ZFI->usePackedStack(MF)) { + // Packed stack: save GPRs topmost in the Register save area. + StartOffset += SystemZMC::FPArgRegSaveAreaSize; + ZFI->incRegSaveAreaUsage(SystemZMC::CallFrameSize - StartOffset); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + int FI = CSI[I].getFrameIdx(); + int Offset = MFFrame.getObjectOffset(FI); + MFFrame.setObjectOffset(FI, Offset + SystemZMC::FPArgRegSaveAreaSize); + } + } + } + // Build an STMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); @@ -215,7 +234,15 @@ unsigned Reg = CSI[I].getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); - TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + int FI = CSI[I].getFrameIdx(); + if (ZFI->usePackedStack(MF) && MFFrame.isFixedObjectIndex(FI)) { + // Packed stack: save any FP arg regs below the GPRs in the Register + // save area. + ZFI->incRegSaveAreaUsage(8); + int Offset = ZFI->getRegSaveAreaUsage(); + MFFrame.setObjectOffset(FI, -Offset); + } + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, FI, &SystemZ::FP64BitRegClass, TRI); } if (SystemZ::VR128BitRegClass.contains(Reg)) { @@ -258,6 +285,8 @@ unsigned LowGPR = ZFI->getLowSavedGPR(); unsigned HighGPR = ZFI->getHighSavedGPR(); unsigned StartOffset = RegSpillOffsets[LowGPR]; + if (ZFI->usePackedStack(MF)) + StartOffset += SystemZMC::FPArgRegSaveAreaSize; if (LowGPR) { // If we saved any of %r2-%r5 as varargs, we should also be saving // and restoring %r6. If we're saving %r6 or above, we should be @@ -393,7 +422,8 @@ for (auto &Save : CSI) { unsigned Reg = Save.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -405,9 +435,9 @@ uint64_t StackSize = MFFrame.getStackSize(); // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another - // function. + // function (this amount may be less with packed stack). if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) { - StackSize += SystemZMC::CallFrameSize; + StackSize += ZFI->getRegSaveAreaUsage(); MFFrame.setStackSize(StackSize); } @@ -434,7 +464,8 @@ if (StoreBackchain) BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) + .addReg(0); } if (HasFP) { @@ -555,6 +586,23 @@ return true; } +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZMachineFunctionInfo *ZFI = + MF.getInfo(); + // With packed stack the local area typically begins above the incoming SP. + int64_t PackedAmt = MFFrame.isFixedObjectIndex(FI) ? + 0 : SystemZMC::CallFrameSize - ZFI->getRegSaveAreaUsage(); + + int64_t Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + + return Offset + PackedAmt; +} + + MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -9,7 +9,9 @@ #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" namespace llvm { @@ -24,12 +26,13 @@ int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; + unsigned RegSaveAreaUsage; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} + ManipulatesSP(false), NumLocalDynamics(0), RegSaveAreaUsage(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -71,6 +74,19 @@ // Count number of local-dynamic TLS symbols used. unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + unsigned getRegSaveAreaUsage() const { return RegSaveAreaUsage; } + void incRegSaveAreaUsage(unsigned Inc) { RegSaveAreaUsage += Inc; } + + bool usePackedStack(MachineFunction &MF) { + bool HasPackedStackAttr = true; + bool IsVarArg = MF.getFunction().isVarArg(); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); + return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && + !FramAddressTaken; + } }; } // end namespace llvm Index: llvm/test/CodeGen/SystemZ/frame-22.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-22.ll @@ -0,0 +1,92 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test the packed stack layout. + +; Test spill/restore of an FPR and a GPR. +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: stmg %r12, %r15, 128(%r15) +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -40 +; CHECK-NEXT: .cfi_def_cfa_offset 200 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -40 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 168(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8},~{r12}"() nounwind + ret void +} + +; Test spill/restore with anyregcc, including an FP argument register. +define anyregcc void @f2() { +; CHECK-LABEL: f2: +; CHECK: stmg %r3, %r15, 56(%r15) +; CHECK-NEXT: .cfi_offset %r3, -104 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -120 +; CHECK-NEXT: .cfi_def_cfa_offset 280 +; CHECK-NEXT: std %f0, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f1, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f0, -112 +; CHECK-NEXT: .cfi_offset %f1, -120 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f0, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r3, %r15, 176(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f0},~{f1},~{r3}"() nounwind + ret void +} + +; Test spill/restore in local area with incoming stack arguments. +define i64 @f3(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + double %A, double %B, double %C, double %D, double %E) { +; CHECK-LABEL: f3: +; CHECK: aghi %r15, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 168 +; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -8 +; CHECK-NEXT: ld %f0, 176(%r15) +; CHECK-NEXT: cgdbr %r2, 5, %f0 +; CHECK-NEXT: ag %r2, 168(%r15) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: aghi %r15, 8 +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %Ei = fptosi double %E to i64 + %S = add i64 %f, %Ei + ret i64 %S +} + +; Test spill/restore in local area with outgoing stack arguments. +define i64 @f4() { +; CHECK-LABEL: f4: +; CHECK-NEXT: stmg %r6, %r15, 80(%r15) +; CHECK-NEXT: .cfi_offset %r6, -80 +; CHECK-NEXT: .cfi_offset %r14, -16 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -104 +; CHECK-NEXT: .cfi_def_cfa_offset 264 +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: llihh %r0, 16404 +; CHECK-NEXT: stg %r0, 168(%r15) +; CHECK: mvghi 160(%r15), 6 +; CHECK-NEXT: brasl %r14, f3@PLT +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r6, %r15, 184(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %C = call i64 @f3 (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0) + ret i64 %C +}