Index: llvm/lib/Target/SystemZ/SystemZCallingConv.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -162,12 +162,14 @@ //===----------------------------------------------------------------------===// // z/OS XPLINK64 callee-saved registers //===----------------------------------------------------------------------===// -def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15), - (sequence "F%dD", 8, 15))>; - -def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add (sequence "R%dD", 8, 15), - (sequence "F%dD", 15, 8), - (sequence "V%d", 23, 16))>; +// %R7D is volatile by the spec, but it must be saved in the prologue by +// any non-leaf function and restored in the epilogue for use by the +// return instruction so it functions exactly like a callee-saved register. +def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15), + (sequence "F%dD", 15, 8))>; + +def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64, + (sequence "V%d", 23, 16))>; //===----------------------------------------------------------------------===// // z/OS XPLINK64 return value calling convention Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -10,6 +10,8 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "SystemZInstrBuilder.h" +#include "SystemZMachineFunctionInfo.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Support/TypeSize.h" @@ -20,10 +22,14 @@ class SystemZFrameLowering : public TargetFrameLowering { +protected: + IndexedMap RegSpillOffsets; + public: SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl, bool StackReal); + static std::unique_ptr create(const SystemZSubtarget &STI); @@ -36,7 +42,6 @@ }; class SystemZELFFrameLowering : public SystemZFrameLowering { - IndexedMap RegSpillOffsets; public: SystemZELFFrameLowering(); @@ -86,9 +91,52 @@ }; class SystemZXPLINKFrameLowering : public SystemZFrameLowering { + /// Issues as many AGHI/AGFI as necessary + /// to increment a certain value found in a 64 bit Reg + /// \returns pointer to final instruction created. + MachineInstr *emitStackIncrement(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &DL, unsigned Reg, + int64_t NumBytes, const TargetInstrInfo *TII, + bool InPrologue = false) const; + + bool isXPLeafCandidate(const MachineFunction &MF) const; + + /// \returns true if this frame is an XPLINK64 frame + /// as defined in the following document: + /// A High Performance Linkage for z/OS (XPLink) + /// Version 5.1.0 04 August 2004 + bool isXPLINKLeaf(const MachineFunction &MF) const; + public: SystemZXPLINKFrameLowering(); + static void addAllSavedGPRs(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, + const std::vector &CSI, + SystemZMachineFunctionInfo *ZFI); + + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; + + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + + /// \returns the number of bytes in the callee-allocated part of the frame. + uint64_t getAllocatedStackSize(const MachineFunction &MF) const; + + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + MutableArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -24,7 +24,7 @@ namespace { // The ABI-defined register save slots, relative to the CFA (i.e. // incoming stack pointer + SystemZMC::ELFCallFrameSize). -static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { +static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = { { SystemZ::R2D, 0x10 }, { SystemZ::R3D, 0x18 }, { SystemZ::R4D, 0x20 }, @@ -44,12 +44,28 @@ { SystemZ::F4D, 0x90 }, { SystemZ::F6D, 0x98 } }; + +static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = { + {SystemZ::R4D, 0x00}, + {SystemZ::R5D, 0x08}, + {SystemZ::R6D, 0x10}, + {SystemZ::R7D, 0x18}, + {SystemZ::R8D, 0x20}, + {SystemZ::R9D, 0x28}, + {SystemZ::R10D, 0x30}, + {SystemZ::R11D, 0x38}, + {SystemZ::R12D, 0x40}, + {SystemZ::R13D, 0x48}, + {SystemZ::R14D, 0x50}, + {SystemZ::R15D, 0x58} +}; } // end anonymous namespace SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl, bool StackReal) - : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal) {} + : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal), + RegSpillOffsets(0) {} std::unique_ptr SystemZFrameLowering::create(const SystemZSubtarget &STI) { @@ -76,7 +92,7 @@ bool SystemZFrameLowering::hasReservedCallFrame( const MachineFunction &MF) const { - // The ELF ABI requires us to allocate 160 bytes of stack space for the + // The ELF ABI requires us to allocate 160 bytes of stack space for the // callee, with any outgoing stack arguments being placed above that. It // seems better to make that area a permanent feature of the frame even if // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes @@ -200,9 +216,8 @@ } SystemZELFFrameLowering::SystemZELFFrameLowering() - : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0, - Align(8), false /* StackRealignable */), - RegSpillOffsets(0) { + : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), + 0, Align(8), /* StackRealignable */ false) { // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not // equal to the incoming stack pointer, but to incoming stack pointer plus // 160. Instead of using a Local Area Offset, the Register save area will @@ -212,8 +227,8 @@ // Create a mapping from register number to save slot offset. // These offsets are relative to the start of the register save area. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); - for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; + for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I) + RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset; } // Add GPR64 to the save instruction being built by MIB, which is in basic @@ -307,7 +322,7 @@ // Restore FPRs/VRs in the normal TargetInstrInfo way. for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + unsigned Reg = CSI[I].getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), &SystemZ::FP64BitRegClass, TRI); @@ -811,11 +826,489 @@ } SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering() - : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128, - Align(32), false /* StackRealignable */) {} + : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), + 128, Align(32), /* StackRealignable */ false) { + + // Create a mapping from register number to save slot offset. + // These offsets are relative to the start of the local are area. + // Since 0 is a legal offset, the values are stored as bit-negated values. + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I) + RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] = + ~XPLINKSpillOffsetTable[I].Offset; +} + +bool SystemZXPLINKFrameLowering::isXPLeafCandidate(const MachineFunction &MF) const { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + auto &Regs = Subtarget.getSpecialRegisters(); + + // If function calls other function calls including alloca + // then it is a non xplink leaf routine + if (MFFrame.hasCalls()) + return false; + + // If the function has var Sized Objects, + // then it is a non xplink leaf routine + if (MFFrame.hasVarSizedObjects()) + return false; + + // If the function adjusts the stack, + // then it is a non xplink leaf routine + if (MFFrame.adjustsStack()) + return false; + + // If function modifies the stack pointer register, + // then it is a non xplink leaf routine + if (MRI.isPhysRegModified(Regs.getStackPointerRegister())) + return false; + + // If function modifies the ADA register, + // then it is a non xplink leaf routine + if (MRI.isPhysRegModified(Regs.getAddressOfCalleeRegister())) + return false; + + // If function modifies the return address register, + // then it is a non xplink leaf routine + if (MRI.isPhysRegModified(Regs.getReturnFunctionAddressRegister())) + return false; + + return true; +} + +bool SystemZXPLINKFrameLowering::isXPLINKLeaf(const MachineFunction &MF) const { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + + if (!isXPLeafCandidate(MF)) + return false; + + return MFFrame.getStackSize() == 0 && MFFrame.estimateStackSize(MF) == 0; +} + +bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + auto &Regs = Subtarget.getSpecialRegisters(); + bool IsLeaf = isXPLeafCandidate(MF) && CSI.empty(); + + if (IsLeaf) + return true; + + // Scan the call-saved GPRs and find the bounds of the register spill area. + unsigned LowGPR = 0; + int LowOffset = INT32_MAX; + unsigned HighGPR = LowGPR; + int HighOffset = -1; + + unsigned RegSP = Regs.getStackPointerRegister(); + auto &GRRegClass = SystemZ::GR64BitRegClass; + const unsigned RegSize = 8; + + auto ProcessCSI = [&](std::vector &CSIList) { + for (auto &CS : CSIList) { + unsigned Reg = CS.getReg(); + int Offset = ~RegSpillOffsets[Reg]; + if (Offset != -1) { + if (GRRegClass.contains(Reg)) { + if (LowOffset > Offset) { + LowOffset = Offset; + LowGPR = Reg; + } + + if (Offset > HighOffset) { + HighOffset = Offset; + HighGPR = Reg; + } + } + int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + }; + + std::vector Spills; + + // For non-leaf functions: + // - the address of callee (entry point) register R6 must be saved + Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); + + // If the function needs a frame pointer, or if the backchain pointer should + // be stored, then save the stack pointer register R4. + if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain")) + Spills.push_back(CalleeSavedInfo(RegSP)); + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ProcessCSI(CSI); + MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset); + + // Save the range of call-saved registers, for use by the epilogue inserter. + ProcessCSI(Spills); + MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset); + + // Create spill slots for the remaining registers. + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + Align Alignment = TRI->getSpillAlign(*RC); + unsigned Size = TRI->getSpillSize(*RC); + Alignment = std::min(Alignment, getStackAlign()); + int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true); + CS.setFrameIdx(FrameIdx); + } + + return true; +} + +void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + bool HasFP = hasFP(MF); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + auto &Regs = Subtarget.getSpecialRegisters(); + + // If the function requires a frame pointer, record that the hard + // frame pointer will be clobbered. + if (HasFP) + SavedRegs.set(Regs.getFramePointerRegister()); + + // If any registers need to be saved then the function is not a leaf function as + // it will need a stack frame to save said registers. + bool IsLeaf = isXPLeafCandidate(MF) && SavedRegs.none(); + + // If the function is not an XPLeaf function, we need to save the + // return address register. We also always use that register for + // the return instruction, so it needs to be restored in the + // epilogue even though that register is considered to be volatile. + if (!IsLeaf) + SavedRegs.set(Regs.getReturnFunctionAddressRegister()); +} + +void SystemZXPLINKFrameLowering::addAllSavedGPRs( + MachineBasicBlock &MBB, MachineInstrBuilder &MIB, + const std::vector &CSI, + SystemZMachineFunctionInfo *ZFI) { + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + auto &GRRegClass = SystemZ::GR64BitRegClass; + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (GRRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, Reg, true); + } +} + +bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction &MF = *MBB.getParent(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto &Regs = Subtarget.getSpecialRegisters(); + SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); + DebugLoc DL; + + // Save GPRs + if (SpillGPRs.LowGPR) { + assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && + "Should be saving multiple registers"); + + // Build an STM/STMG instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, + TII->get(SystemZ::STMG)); + + // Add the explicit register operands. + addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false); + + // Add the address r4 + MIB.addReg(Regs.getStackPointerRegister()); + + // Add the partial offset + // We cannot add the actual offset as, at the stack is not finalized + MIB.addImm(SpillGPRs.GPROffset); + + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + auto &GRRegClass = SystemZ::GR64BitRegClass; + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (GRRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, Reg, true); + } + } + + // Spill FPRs to the stack in the normal TargetInstrInfo way + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + } + if (SystemZ::VR128BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI); + } + } + + return true; +} + +// Emit instructions before MBBI (in MBB) to add NumBytes to Reg. +MachineInstr *SystemZXPLINKFrameLowering::emitStackIncrement( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, unsigned Reg, + int64_t NumBytes, const TargetInstrInfo *TII, bool InPrologue) const { + + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + + const unsigned OpcodeAHI = SystemZ::AGHI; + const unsigned OpcodeAFI = SystemZ::AGFI; + const unsigned Opcode = isInt<16>(NumBytes) ? OpcodeAHI : OpcodeAFI; + const unsigned Align = 32; + + MachineInstr *MI = nullptr; + while (NumBytes) { + int64_t ThisVal = NumBytes; + + if (Opcode == OpcodeAFI) { + // Make sure we maintain stack alignment. + int64_t MinVal = -uint64_t(1) << 31; + int64_t MaxVal = (int64_t(1) << 31) - Align; + if (ThisVal < MinVal) + ThisVal = MinVal; + else if (ThisVal > MaxVal) + ThisVal = MaxVal; + } + + MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg) + .addReg(Reg) + .addImm(ThisVal); + if (InPrologue) { + MCSymbol *StackPtrUpdateSymbol = + MF.getContext().createTempSymbol("stack_update"); + MI->setPreInstrSymbol(MF, StackPtrUpdateSymbol); + ZFI->setStackUpdateSymbol(StackPtrUpdateSymbol); + InPrologue = false; + } + + // The CC implicit def is dead. + MI->getOperand(3).setIsDead(); + NumBytes -= ThisVal; + } + return MI; +} + +uint64_t SystemZXPLINKFrameLowering::getAllocatedStackSize( + const MachineFunction &MF) const { + + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + auto &Regs = Subtarget.getSpecialRegisters(); + + // Start with the size of the local variables and spill slots. + // Subtracting the MaxCallFrameSize since a static object is created + // to alias the area used for passing arguments to called functions. + uint64_t StackSize = MFFrame.getStackSize() - MFFrame.getMaxCallFrameSize(); + + // Need to allocate the ABI-defined 128-byte base area whenever + // function is not XPLINK64 leaf routine. TODO: Implement support + // for leaf routines. + if (!isXPLINKLeaf(MF)) + StackSize += Regs.getCallFrameSize(); + + return StackSize; +} void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const {} + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const std::vector &CSI = MFFrame.getCalleeSavedInfo(); + auto *ZII = static_cast(Subtarget.getInstrInfo()); + auto &Regs = Subtarget.getSpecialRegisters(); + MachineInstr *StoreInstr = nullptr; + MachineInstr *EndOfPrologue = nullptr; + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + // The current offset of the stack pointer from the CFA. + int64_t SPOffsetFromCFA = -Regs.getCFAOffsetFromInitialSP(); + uint64_t Offset = 0; + const uint64_t StackSize = getAllocatedStackSize(MF); + if (ZFI->getSpillGPRRegs().LowGPR) { + // Skip over the GPR saves. + if ((MBBI != MBB.end()) && + ((MBBI->getOpcode() == SystemZ::STMG))) { + const int Operand = 3; + // Now we can set the offset for the operation, since now the Stack + // has been finalized. + Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm(); + // Maximum displacement for STMG instruction. + if (isInt<20>(Offset - StackSize)) + Offset -= StackSize; + else + StoreInstr = &*MBBI; + MBBI->getOperand(Operand).setImm(Offset); + EndOfPrologue = &*MBBI; + ++MBBI; + } else + llvm_unreachable("Couldn't skip over GPR saves"); + // Add CFI for the GPR saves. + for (auto &Save : CSI) { + unsigned Reg = Save.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + } + if (StackSize) { + MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI; + // Allocate StackSize bytes. + int64_t Delta = -int64_t(StackSize); + MachineInstr *MI = + emitStackIncrement(MF, MBB, InsertPt, DL, + Regs.getStackPointerRegister(), Delta, ZII, true); + if (!StoreInstr) + EndOfPrologue = MI; + // Add CFI for the allocation. + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + SPOffsetFromCFA += Delta; + } + // Skip over the FPR/VR saves. + SmallVector CFIIndexes; + for (auto &Save : CSI) { + unsigned Reg = Save.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg) || + SystemZ::VR128BitRegClass.contains(Reg)) { + if (MBBI != MBB.end() && (MBBI->getOpcode() == SystemZ::STD || + MBBI->getOpcode() == SystemZ::STDY || + MBBI->getOpcode() == SystemZ::VST)) { + EndOfPrologue = &*MBBI; + ++MBBI; + } else + llvm_unreachable("Couldn't skip over FPR/VR save."); + // Add CFI for the this save. + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + Register IgnoredFrameReg; + int Offset = + getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg) + .getFixed(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, DwarfReg, SPOffsetFromCFA + Offset)); + CFIIndexes.push_back(CFIIndex); + } + } + // Complete the CFI for the FPR saves, modelling them as taking effect + // after the last save. + for (auto CFIIndex : CFIIndexes) { + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + // Attach symbol at end of prologue. + // The iterator MBBI points to the first instruction after the prologue, and + // is adjusted before adding the symbol. + if (EndOfPrologue) { + MCSymbol *EndOfPrologueSymbol = + MF.getContext().createTempSymbol("end_of_prologue"); + EndOfPrologue->setPostInstrSymbol(MF, EndOfPrologueSymbol); + ZFI->setEndOfPrologueSymbol(EndOfPrologueSymbol); + } +} + +bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto *Regs = static_cast(Subtarget.getSpecialRegisters()); + assert(Regs && "Unsupported SystemZ calling convention"); + + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Restore FPRs in the normal TargetInstrInfo way. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + if (SystemZ::VR128BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + &SystemZ::VR128BitRegClass, TRI); + } + + // Restore call-saved GPRs (but not call-clobbered varargs, which at + // this point might hold return values). + SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); + if (RestoreGPRs.LowGPR) { + if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR) + // Build an LG/L instruction. + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), + RestoreGPRs.LowGPR) + .addReg(Regs->getStackPointerRegister()) + .addImm(Regs->getStackPointerBias() + RestoreGPRs.GPROffset) + .addReg(0); + else { + assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR && + "Should be loading multiple regs"); + + // Build an LMG/LM instruction. + MachineInstrBuilder MIB = BuildMI( + MBB, MBBI, DL, TII->get(SystemZ::LMG)); + + // Add the explicit register operands. + MIB.addReg(RestoreGPRs.LowGPR, RegState::Define); + MIB.addReg(RestoreGPRs.HighGPR, RegState::Define); + + // Add the address. + MIB.addReg(Regs->getStackPointerRegister()); + MIB.addImm(Regs->getStackPointerBias() + RestoreGPRs.GPROffset); + + // Do a second scan adding regs as being defined by instruction + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + } + + return true; +} void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {} Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -37,11 +37,23 @@ bool ManipulatesSP; unsigned NumLocalDynamics; + // XPLINK64 set and used fields + + /// The Saved Regs in the Prologue + BitVector SavedRegs; + + /// End of prologue symbol. + MCSymbol *EndOfPrologueSymbol; + + /// Stack pointer update symbol. + MCSymbol *StackUpdateSymbol; + public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false), - NumLocalDynamics(0) {} + NumLocalDynamics(0), SavedRegs(), EndOfPrologueSymbol(nullptr), + StackUpdateSymbol(nullptr) {} // Get and set the first and last call-saved GPR that should be saved by // this function and the SP offset for the STMG. These are 0 if no GPRs @@ -63,6 +75,19 @@ RestoreGPRRegs.GPROffset = Offs; } + // Get and set the array of regs that need to be restored by this function + const BitVector &getSavedRegs() const { return SavedRegs; } + void setSavedRegs(BitVector &Param) { SavedRegs = Param; } + + // Get and set the end of the prologue symbol. + MCSymbol *getEndOfPrologueSymbol() const { return EndOfPrologueSymbol; } + void setEndOfPrologueSymbol(MCSymbol *Sym) { EndOfPrologueSymbol = Sym; } + + // Get and set the symbol of the first instruction that modifies the SP + // register. + MCSymbol *getStackUpdateSymbol() const { return StackUpdateSymbol; } + void setStackUpdateSymbol(MCSymbol *Sym) { StackUpdateSymbol = Sym; } + // Get and set the number of fixed (as opposed to variable) arguments // that are passed in GPRs to this function. Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; } Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H #include "SystemZ.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_REGINFO_HEADER @@ -44,9 +45,17 @@ /// It is abstract, all calling conventions must override and /// define the pure virtual member function defined in this class. class SystemZCallingConventionRegisters { + public: - /// \returns the register that keeps the - /// return function address. + /// \returns the offset to the locals area. + virtual int getCallFrameSize() = 0; + + /// \returns the current offset of the stack pointer from the CFA. + virtual int getCFAOffsetFromInitialSP() = 0; + + typedef TargetFrameLowering::SpillSlot SpillSlotArrayType[]; + + /// \returns the register that keeps the return function address. virtual int getReturnFunctionAddressRegister() = 0; /// \returns the register that keeps the @@ -71,13 +80,27 @@ }; /// XPLINK64 calling convention specific use registers -/// Particular to z/OS when in 64 bit mode +/// Particular to z/OS when using XPLINK linkage. class SystemZXPLINK64Registers : public SystemZCallingConventionRegisters { public: + int getCallFrameSize() override final { return 128; } + + int getCFAOffsetFromInitialSP() override final { return getCallFrameSize(); } + + // The XPLINK ABI-defined register save slots, + // relative to the incoming stack pointer. + static const SpillSlotArrayType SpillOffsetTable64; + int getReturnFunctionAddressRegister() override final { return SystemZ::R7D; }; + int getAddressOfCalleeRegister() { + return SystemZ::R6D; + } + + int getStackPointerBias() { return 2048; }; + int getStackPointerRegister() override final { return SystemZ::R4D; }; int getFramePointerRegister() override final { return SystemZ::R8D; }; @@ -95,7 +118,18 @@ /// ELF calling convention specific use registers /// Particular when on zLinux in 64 bit mode class SystemZELFRegisters : public SystemZCallingConventionRegisters { + + // The ABI-defined register save slots, relative to the incoming stack + // pointer. + static const SpillSlotArrayType SpillOffsetTable; + public: + int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; } + + int getCFAOffsetFromInitialSP() override final { + return SystemZMC::ELFCFAOffsetFromInitialSP; + } + int getReturnFunctionAddressRegister() override final { return SystemZ::R14D; }; Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -190,7 +190,9 @@ const MCPhysReg * SystemZXPLINK64Registers::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_SystemZ_XPLINK64_SaveList; + const SystemZSubtarget &Subtarget = MF->getSubtarget(); + return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_SaveList + : CSR_SystemZ_XPLINK64_SaveList; } const MCPhysReg * @@ -211,7 +213,9 @@ const uint32_t * SystemZXPLINK64Registers::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { - return CSR_SystemZ_XPLINK64_RegMask; + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_RegMask + : CSR_SystemZ_XPLINK64_RegMask; } const uint32_t * Index: llvm/lib/Target/SystemZ/SystemZSubtarget.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -96,6 +96,10 @@ return SpecialRegisters.get(); } + template SR &getSpecialRegisters() const { + return *static_cast(getSpecialRegisters()); + } + const TargetFrameLowering *getFrameLowering() const override { return FrameLowering.get(); } Index: llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll @@ -0,0 +1,277 @@ +; Test the generated function prologs/epilogs under XPLINK64 on z/OS +; +; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck --check-prefixes=CHECK64,CHECK %s + +; Test prolog/epilog for non-XPLEAF. + +; Small stack frame. +; CHECK-LABEL: func0 +; CHECK64: stmg 6, 7, [[#OFFSET:]](4) +; CHECK64: @@stack_update0: +; stmg instruction's displacement field must be 2064-dsa_size +; as per ABI +; CHECK64: aghi 4, -[[#2064-OFFSET]] +; CHECK64: @@end_of_prologue0: +define void @func0() { + call i64 (i64) @fun(i64 10) + ret void +} + +; Spill all GPR CSRs +; CHECK-LABEL: func1 +; CHECK64: stmg 6, 15, [[#OFFSET:]](4) +; CHECK64: @@stack_update1: +; CHECK64: aghi 4, -[[#2064-OFFSET]] +; CHECK64: @@end_of_prologue1: +define void @func1(i64 *%ptr) { + %l01 = load volatile i64, i64 *%ptr + %l02 = load volatile i64, i64 *%ptr + %l03 = load volatile i64, i64 *%ptr + %l04 = load volatile i64, i64 *%ptr + %l05 = load volatile i64, i64 *%ptr + %l06 = load volatile i64, i64 *%ptr + %l07 = load volatile i64, i64 *%ptr + %l08 = load volatile i64, i64 *%ptr + %l09 = load volatile i64, i64 *%ptr + %l10 = load volatile i64, i64 *%ptr + %l11 = load volatile i64, i64 *%ptr + %l12 = load volatile i64, i64 *%ptr + %l13 = load volatile i64, i64 *%ptr + %l14 = load volatile i64, i64 *%ptr + %l15 = load volatile i64, i64 *%ptr + %add01 = add i64 %l01, %l01 + %add02 = add i64 %l02, %add01 + %add03 = add i64 %l03, %add02 + %add04 = add i64 %l04, %add03 + %add05 = add i64 %l05, %add04 + %add06 = add i64 %l06, %add05 + %add07 = add i64 %l07, %add06 + %add08 = add i64 %l08, %add07 + %add09 = add i64 %l09, %add08 + %add10 = add i64 %l10, %add09 + %add11 = add i64 %l11, %add10 + %add12 = add i64 %l12, %add11 + %add13 = add i64 %l13, %add12 + %add14 = add i64 %l14, %add13 + %add15 = add i64 %l15, %add14 + store volatile i64 %add01, i64 *%ptr + store volatile i64 %add02, i64 *%ptr + store volatile i64 %add03, i64 *%ptr + store volatile i64 %add04, i64 *%ptr + store volatile i64 %add05, i64 *%ptr + store volatile i64 %add06, i64 *%ptr + store volatile i64 %add07, i64 *%ptr + store volatile i64 %add08, i64 *%ptr + store volatile i64 %add09, i64 *%ptr + store volatile i64 %add10, i64 *%ptr + store volatile i64 %add11, i64 *%ptr + store volatile i64 %add12, i64 *%ptr + store volatile i64 %add13, i64 *%ptr + store volatile i64 %add14, i64 *%ptr + store volatile i64 %add15, i64 *%ptr + ret void +} + + +; Spill all FPRs and VRs +; CHECK-LABEL: func2 +; CHECK64: stmg 6, 7, [[#OFFSET:]](4) +; CHECK64: @@stack_update2: +; CHECK64: aghi 4, -[[#2064-OFFSET]] +; CHECK64: std 15, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 14, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 13, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 12, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 11, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 10, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 9, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: std 8, {{[0-9]+}}(4) * 8-byte Folded Spill +; CHECK64: vst 23, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 22, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 21, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 20, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 19, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 18, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 17, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: vst 16, {{[0-9]+}}(4), 4 * 16-byte Folded Spill +; CHECK64: @@end_of_prologue2: + +; CHECK64: ld 15, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 14, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 13, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 12, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 11, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 10, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 9, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: ld 8, {{[0-9]+}}(4) * 8-byte Folded Reload +; CHECK64: vl 23, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 22, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 21, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 20, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 19, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 18, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 17, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +; CHECK64: vl 16, {{[0-9]+}}(4), 4 * 16-byte Folded Reload +define void @func2(double *%ptr, <2 x i64> *%vec_ptr) { + %l00 = load volatile double, double *%ptr + %l01 = load volatile double, double *%ptr + %l02 = load volatile double, double *%ptr + %l03 = load volatile double, double *%ptr + %l04 = load volatile double, double *%ptr + %l05 = load volatile double, double *%ptr + %l06 = load volatile double, double *%ptr + %l07 = load volatile double, double *%ptr + %l08 = load volatile double, double *%ptr + %l09 = load volatile double, double *%ptr + %l10 = load volatile double, double *%ptr + %l11 = load volatile double, double *%ptr + %l12 = load volatile double, double *%ptr + %l13 = load volatile double, double *%ptr + %l14 = load volatile double, double *%ptr + %l15 = load volatile double, double *%ptr + %add00 = fadd double %l01, %l00 + %add01 = fadd double %l01, %add00 + %add02 = fadd double %l02, %add01 + %add03 = fadd double %l03, %add02 + %add04 = fadd double %l04, %add03 + %add05 = fadd double %l05, %add04 + %add06 = fadd double %l06, %add05 + %add07 = fadd double %l07, %add06 + %add08 = fadd double %l08, %add07 + %add09 = fadd double %l09, %add08 + %add10 = fadd double %l10, %add09 + %add11 = fadd double %l11, %add10 + %add12 = fadd double %l12, %add11 + %add13 = fadd double %l13, %add12 + %add14 = fadd double %l14, %add13 + %add15 = fadd double %l15, %add14 + store volatile double %add00, double *%ptr + store volatile double %add01, double *%ptr + store volatile double %add02, double *%ptr + store volatile double %add03, double *%ptr + store volatile double %add04, double *%ptr + store volatile double %add05, double *%ptr + store volatile double %add06, double *%ptr + store volatile double %add07, double *%ptr + store volatile double %add08, double *%ptr + store volatile double %add09, double *%ptr + store volatile double %add10, double *%ptr + store volatile double %add11, double *%ptr + store volatile double %add12, double *%ptr + store volatile double %add13, double *%ptr + store volatile double %add14, double *%ptr + store volatile double %add15, double *%ptr + + %v00 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v01 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v02 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v03 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v04 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v05 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v06 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v07 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v08 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v09 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v10 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v11 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v12 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v13 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v14 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v15 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v16 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v17 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v18 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v19 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v20 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v21 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v22 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v23 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v24 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v25 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v26 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v27 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v28 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v29 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v30 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %v31 = load volatile <2 x i64>, <2 x i64> *%vec_ptr + %vadd00 = add <2 x i64> %v00, %v00 + %vadd01 = add <2 x i64> %v01, %vadd00 + %vadd02 = add <2 x i64> %v02, %vadd01 + %vadd03 = add <2 x i64> %v03, %vadd02 + %vadd04 = add <2 x i64> %v04, %vadd03 + %vadd05 = add <2 x i64> %v05, %vadd04 + %vadd06 = add <2 x i64> %v06, %vadd05 + %vadd07 = add <2 x i64> %v07, %vadd06 + %vadd08 = add <2 x i64> %v08, %vadd07 + %vadd09 = add <2 x i64> %v09, %vadd08 + %vadd10 = add <2 x i64> %v10, %vadd09 + %vadd11 = add <2 x i64> %v11, %vadd10 + %vadd12 = add <2 x i64> %v12, %vadd11 + %vadd13 = add <2 x i64> %v13, %vadd12 + %vadd14 = add <2 x i64> %v14, %vadd13 + %vadd15 = add <2 x i64> %v15, %vadd14 + %vadd16 = add <2 x i64> %v16, %vadd15 + %vadd17 = add <2 x i64> %v17, %vadd16 + %vadd18 = add <2 x i64> %v18, %vadd17 + %vadd19 = add <2 x i64> %v19, %vadd18 + %vadd20 = add <2 x i64> %v20, %vadd19 + %vadd21 = add <2 x i64> %v21, %vadd20 + %vadd22 = add <2 x i64> %v22, %vadd21 + %vadd23 = add <2 x i64> %v23, %vadd22 + %vadd24 = add <2 x i64> %v24, %vadd23 + %vadd25 = add <2 x i64> %v25, %vadd24 + %vadd26 = add <2 x i64> %v26, %vadd25 + %vadd27 = add <2 x i64> %v27, %vadd26 + %vadd28 = add <2 x i64> %v28, %vadd27 + %vadd29 = add <2 x i64> %v29, %vadd28 + %vadd30 = add <2 x i64> %v30, %vadd29 + %vadd31 = add <2 x i64> %v31, %vadd30 + store volatile <2 x i64> %vadd00, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd01, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd02, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd03, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd04, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd05, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd06, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd07, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd08, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd09, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd10, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd11, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd12, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd13, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd14, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd15, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd16, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd17, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd18, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd19, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd20, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd21, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd22, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd23, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd24, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd25, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd26, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd27, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd28, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd29, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd30, <2 x i64> *%vec_ptr + store volatile <2 x i64> %vadd31, <2 x i64> *%vec_ptr + ret void +} + +; XPLink Leaf Function. This should not save any registers, +; and it should not create its own stack frame. Therefore +; the stack pointer should be untouched. +; CHECK-LABEL: func3 +; CHECK-NOT: stmg +; CHECK-NOT: @@stack_update3 +; CHECK-NOT: {{ag?hi}} 4 +; CHECK-NOT: @@end_of_prologue3 +define i64 @func3(i64 %n) { + %nn = mul i64 %n, %n + ret i64 %nn +} + +declare i64 @fun(i64 %arg0)