Index: llvm/include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -28,9 +28,14 @@ /// The CalleeSavedInfo class tracks the information need to locate where a /// callee saved register is in the current frame. +/// Callee saved reg can also be saved to a different register rather than +/// on the stack by setting DstReg instead of FrameIdx. class CalleeSavedInfo { unsigned Reg; - int FrameIdx; + union { + int FrameIdx; + unsigned DstReg; + }; /// Flag indicating whether the register is actually restored in the epilog. /// In most cases, if a register is saved, it is also restored. There are /// some situations, though, when this is not the case. For example, the @@ -44,17 +49,29 @@ /// by implicit uses on the return instructions, however, the required /// changes in the ARM backend would be quite extensive. bool Restored; + /// Flag indicating whether the register is spilled to stack or another + /// register. + bool SpilledToReg; public: explicit CalleeSavedInfo(unsigned R, int FI = 0) - : Reg(R), FrameIdx(FI), Restored(true) {} + : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {} // Accessors. unsigned getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } - void setFrameIdx(int FI) { FrameIdx = FI; } + unsigned getDstReg() const { return DstReg; } + void setFrameIdx(int FI) { + FrameIdx = FI; + SpilledToReg = false; + } + void setDstReg(unsigned SpillReg) { + DstReg = SpillReg; + SpilledToReg = true; + } bool isRestored() const { return Restored; } void setRestored(bool R) { Restored = R; } + bool isSpilledToReg() const { return SpilledToReg; } }; /// The MachineFrameInfo class represents an abstract stack frame until @@ -267,9 +284,9 @@ unsigned MaxCallFrameSize = ~0u; /// The prolog/epilog code inserter fills in this vector with each - /// callee saved register saved in the frame. Beyond its use by the prolog/ - /// epilog code inserter, this data used for debug info and exception - /// handling. + /// callee saved register saved in either the frame or a different + /// register. Beyond its use by the prolog/ epilog code inserter, + /// this data is used for debug info and exception handling. std::vector CSInfo; /// Has CSInfo been set yet? Index: llvm/lib/CodeGen/MIRPrinter.cpp =================================================================== --- llvm/lib/CodeGen/MIRPrinter.cpp +++ llvm/lib/CodeGen/MIRPrinter.cpp @@ -398,18 +398,20 @@ for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { yaml::StringValue Reg; printRegMIR(CSInfo.getReg(), Reg, TRI); - auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); - assert(StackObjectInfo != StackObjectOperandMapping.end() && - "Invalid stack object index"); - const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) { - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); - } else { - YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.StackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); + if (!CSInfo.isSpilledToReg()) { + auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + if (StackObject.IsFixed) { + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } else { + YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.StackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } } } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { Index: llvm/lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -75,6 +75,10 @@ using MBBVector = SmallVector; +STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs"); +STATISTIC(NumFuncSeen, "Number of functions seen in PEI"); + + namespace { class PEI : public MachineFunctionPass { @@ -168,6 +172,7 @@ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &MF) { + NumFuncSeen++; const Function &F = MF.getFunction(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); @@ -357,6 +362,11 @@ // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (auto &CS : CSI) { + // If the target has spilled this register to another register, we don't + // need to allocate a stack slot. + if (CS.isSpilledToReg()) + continue; + unsigned Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); @@ -454,7 +464,22 @@ if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) MBB->addLiveIn(Reg); } + // If callee-saved register is spilled to another register rather than + // spilling to stack, the destination register has to be marked as live for + // each MBB between the prologue and epilogue so that it is not clobbered + // before it is reloaded in the epilogue. The Visited set contains all + // blocks outside of the region delimited by prologue/epilogue. + if (CSI[i].isSpilledToReg()) { + for (MachineBasicBlock &MBB : MF) { + if (Visited.count(&MBB)) + continue; + MCPhysReg DstReg = CSI[i].getDstReg(); + if (!MBB.isLiveIn(DstReg)) + MBB.addLiveIn(DstReg); + } + } } + } /// Insert restore code for the callee-saved registers used in the function. @@ -530,6 +555,9 @@ std::vector &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { + if (!MFI.hasCalls()) + NumLeafFuncWithSpills++; + for (MachineBasicBlock *SaveBlock : SaveBlocks) { insertCSRSaves(*SaveBlock, CSI); // Update the live-in information of all the blocks up to the save Index: llvm/lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -99,6 +99,13 @@ MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; + /// This function will assign callee saved gprs to volatile vector registers + /// for prologue spills when applicable. It returns false if there are any + /// registers which were not spilled to volatile vector registers. + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -17,6 +17,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,6 +29,16 @@ using namespace llvm; +#define DEBUG_TYPE "framelowering" +STATISTIC(NumNoNeedForFrame, "Number of functions without frames"); +STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); +STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); + +static cl::opt +EnablePEVectorSpills("ppc-enable-pe-vector-spills", + cl::desc("Enable spills in prologue to vector registers."), + cl::init(false), cl::Hidden); + /// VRRegNo - Map from a numbered VR register to its enum value. /// static const MCPhysReg VRRegNo[] = { @@ -466,6 +477,7 @@ // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { + NumNoNeedForFrame++; // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1213,11 +1225,20 @@ continue; } - int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (CSI[I].isSpilledToReg()) { + unsigned SpilledReg = CSI[I].getDstReg(); + unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( + nullptr, MRI->getDwarfRegNum(Reg, true), + MRI->getDwarfRegNum(SpilledReg, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIRegister); + } else { + int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } } } } @@ -1822,17 +1843,19 @@ // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { - int FI = GPRegs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!GPRegs[i].isSpilledToReg()) { + int FI = GPRegs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { - int FI = G8Regs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!G8Regs[i].isSpilledToReg()) { + int FI = G8Regs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } unsigned MinReg = @@ -1947,6 +1970,64 @@ } } +// This function checks if a callee saved gpr can be spilled to a volatile +// vector register. This occurs for leaf functions when the option +// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers +// which were not spilled to vectors, return false so the target independent +// code can handle them by assigning a FrameIdx to a stack slot. +bool PPCFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + // Early exit if cannot spill gprs to volatile vector registers. + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) + return false; + + // Build a BitVector of VSRs that can be used for spilling GPRs. + BitVector BVAllocatable = TRI->getAllocatableSet(MF); + BitVector BVCalleeSaved(TRI->getNumRegs()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + BVCalleeSaved.set(CSRegs[i]); + + for (unsigned Reg : BVAllocatable.set_bits()) { + // Set to 0 if the register is not a volatile VF/F8 register, or if it is + // used in the function. + if (BVCalleeSaved[Reg] || + (!PPC::F8RCRegClass.contains(Reg) && + !PPC::VFRCRegClass.contains(Reg)) || + (MF.getRegInfo().isPhysRegUsed(Reg))) + BVAllocatable.reset(Reg); + } + + bool AllSpilledToReg = true; + for (auto &CS : CSI) { + if (BVAllocatable.none()) + return false; + + unsigned Reg = CS.getReg(); + if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { + AllSpilledToReg = false; + continue; + } + + unsigned VolatileVFReg = BVAllocatable.find_first(); + if (VolatileVFReg < BVAllocatable.size()) { + CS.setDstReg(VolatileVFReg); + BVAllocatable.reset(VolatileVFReg); + } else { + AllSpilledToReg = false; + } + } + return AllSpilledToReg; +} + + bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2012,12 +2093,18 @@ CSI[i].getFrameIdx())); } } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - // Use !IsLiveIn for the kill flag. - // We do not want to kill registers that are live in this function - // before their use because they will become undefined registers. - TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, - CSI[i].getFrameIdx(), RC, TRI); + if (CSI[i].isSpilledToReg()) { + NumPESpillVSR++; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // Use !IsLiveIn for the kill flag. + // We do not want to kill registers that are live in this function + // before their use because they will become undefined registers. + TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, + CSI[i].getFrameIdx(), RC, TRI); + } } } return true; @@ -2157,13 +2244,19 @@ CR2Spilled = CR3Spilled = CR4Spilled = false; } - // Default behavior for non-CR saves. - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); + if (CSI[i].isSpilledToReg()) { + DebugLoc DL; + NumPEReloadVSR++; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) + .addReg(CSI[i].getDstReg(), getKillRegState(true)); + } else { + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); } + } // Insert in reverse order. if (AtStart) Index: llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir @@ -0,0 +1,62 @@ +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=prologepilog -ppc-enable-pe-vector-spills %s -o - | FileCheck %s + +--- +name: test1BB +alignment: 4 +tracksRegLiveness: true +liveins: +body: | + bb.0.entry: + $r14 = IMPLICIT_DEF + $r15 = IMPLICIT_DEF + $r16 = IMPLICIT_DEF + $f0 = IMPLICIT_DEF + $v20 = IMPLICIT_DEF + BLR8 implicit undef $lr8, implicit undef $rm + +# CHECK-LABEL: name: test1BB +# CHECK: body: | +# CHECK: $f1 = MTVSRD killed $x14 +# CHECK-NEXT: $f2 = MTVSRD killed $x15 +# CHECK-NEXT: $f3 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $f3 +# CHECK-NEXT: $x15 = MFVSRD killed $f2 +# CHECK-NEXT: $x14 = MFVSRD killed $f1 +... + +--- +name: test2BBs +alignment: 4 +tracksRegLiveness: true +liveins: +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + + $cr0 = IMPLICIT_DEF + BCC 4, killed renamable $cr0, %bb.2 + B %bb.1 + + bb.1: + $r14 = IMPLICIT_DEF + $r15 = IMPLICIT_DEF + $r16 = IMPLICIT_DEF + $r3 = IMPLICIT_DEF + B %bb.3 + + bb.2: + liveins: $x3 + $r3 = IMPLICIT_DEF + + bb.3: + BLR8 implicit undef $lr8, implicit undef $rm + +# CHECK-LABEL: name: test2BB +# CHECK: body: | +# CHECK: $f0 = MTVSRD killed $x14 +# CHECK-NEXT: $f1 = MTVSRD killed $x15 +# CHECK-NEXT: $f2 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $f2 +# CHECK-NEXT: $x15 = MFVSRD killed $f1 +# CHECK-NEXT: $x14 = MFVSRD killed $f0 +...