Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -28,9 +28,12 @@ /// The CalleeSavedInfo class tracks the information need to locate where a /// callee saved register is in the current frame. +/// Callee saved reg can also be saved to a different register rather than +/// on the stack by setting DstReg instead of FrameIdx. class CalleeSavedInfo { unsigned Reg; int FrameIdx; + unsigned DstReg; /// Flag indicating whether the register is actually restored in the epilog. /// In most cases, if a register is saved, it is also restored. There are /// some situations, though, when this is not the case. For example, the @@ -44,17 +47,25 @@ /// by implicit uses on the return instructions, however, the required /// changes in the ARM backend would be quite extensive. bool Restored; + /// Flag indicating whether the register is spilled to stack or another + /// register. + bool SpilledToReg = false; public: explicit CalleeSavedInfo(unsigned R, int FI = 0) - : Reg(R), FrameIdx(FI), Restored(true) {} + : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {} // Accessors. unsigned getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } + int getDstReg() const { return DstReg; } void setFrameIdx(int FI) { FrameIdx = FI; } + void setDstReg(unsigned SpillReg) { DstReg = SpillReg; } bool isRestored() const { return Restored; } void setRestored(bool R) { Restored = R; } + bool isSpilledToReg() const { return SpilledToReg; } + void setSpilledToReg(bool R) { SpilledToReg = R; } + }; /// The MachineFrameInfo class represents an abstract stack frame until Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -81,6 +81,10 @@ unsigned &MaxCXFrameIndex, const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks); +STATISTIC(NumLeafFuncWithSpills, + "Number of leaf functions which have CSRs to spill in prologue"); +STATISTIC(NumFuncSeen, + "Number of functions seen in PEI"); namespace { @@ -176,6 +180,7 @@ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &Fn) { + NumFuncSeen++; const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -537,6 +542,9 @@ std::vector &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { + if (!MFI.hasCalls()) + NumLeafFuncWithSpills++; + for (MachineBasicBlock *SaveBlock : SaveBlocks) { insertCSRSaves(*SaveBlock, CSI); // Update the live-in information of all the blocks up to the save Index: lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.h +++ lib/Target/PowerPC/PPCFrameLowering.h @@ -99,6 +99,13 @@ MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; + /// This function will assign callee saved grps to volatile vector registers + /// for prologue spills when applicable. If it cannot spill to a volatile + /// vector register, it will assign a stack frame index. + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -17,6 +17,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,8 +29,24 @@ using namespace llvm; +#define DEBUG_TYPE "framelowering" +STATISTIC(NumNoNeedForFrame, "Number of functions without frames"); +STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); +STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); + +static cl::opt EnablePEVectorSpills( + "ppc-enable-pe-vector-spills", + cl::desc("Enable spills in prologue to vector registers."), + cl::init(false), cl::Hidden); /// VRRegNo - Map from a numbered VR register to its enum value. /// +static const MCPhysReg VolatileVFRegNo[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, + PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13, + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, PPC::VF4, PPC::VF5, PPC::VF6, + PPC::VF7, PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, PPC::VF12, PPC::VF13, + PPC::VF14, PPC::VF15, PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19}; + static const MCPhysReg VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, @@ -446,6 +463,7 @@ // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { + NumNoNeedForFrame++; // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1198,7 +1216,7 @@ nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - } + } } } @@ -1798,17 +1816,19 @@ // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { - int FI = GPRegs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!GPRegs[i].isSpilledToReg()) { + int FI = GPRegs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { - int FI = G8Regs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!G8Regs[i].isSpilledToReg()) { + int FI = G8Regs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } unsigned MinReg = @@ -1921,6 +1941,94 @@ } } +// This function checks if a callee saved gpr can be spilled to a volatile +// vector register. This occurs for leaf functions that do not need CFI and +// when option ppc-enable-pe-vector-spills is enabled. If a free volatile +// vector register is not found, assign a FrameIdx to spill to stack. +bool PPCFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + MachineModuleInfo &MMI = MF.getMMI(); + bool needsCFI = MMI.hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + // Early exit if cannot spill gprs to volatile vector registers. + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!EnablePEVectorSpills || MFI.hasCalls() || needsCFI || + !Subtarget.hasP9Vector()) + return false; + + unsigned NumFixedSpillSlots; + const PPCFrameLowering::SpillSlot *FixedSpillSlots = + getCalleeSavedSpillSlots(NumFixedSpillSlots); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned VolatileVFRegNoIdx = 0; + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + + // Check if this CSR can be spilled to a volatile vector register + if (PPC::G8RCRegClass.contains(Reg) || PPC::GPRCRegClass.contains(Reg)) { + for (unsigned i = VolatileVFRegNoIdx; i < 34; ++i) { + if (!MRI.isPhysRegUsed(VolatileVFRegNo[i])) { + VolatileVFRegNoIdx = i + 1; + CS.setSpilledToReg(true); + CS.setDstReg(VolatileVFRegNo[i]); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock *BB = &*I; + BB->addLiveIn(VolatileVFRegNo[i]); + } + break; + } + } + } + + // If CSR not spilled to volatile vector register, assign a frame index. + if (!CS.isSpilledToReg()) { + int FrameIdx; + if (TRI->hasReservedSpillSlot(MF, Reg, FrameIdx)) { + CS.setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack + // slot. + const PPCFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + unsigned Size = TRI->getSpillSize(*RC); + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = TRI->getSpillAlignment(*RC); + unsigned StackAlign = getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI.CreateStackObject(Size, Align, true); + } else { + // Spill it to the stack where we must. + FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); + } + + CS.setFrameIdx(FrameIdx); + } + } + + return true; +} + + bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -1979,9 +2087,15 @@ CSI[i].getFrameIdx())); } } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); + if (CSI[i].isSpilledToReg()) { + NumPESpillVSR++; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, + TRI); + } } } return true; @@ -2090,7 +2204,6 @@ for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - // Only Darwin actually uses the VRSAVE register, but it can still appear // here if, for example, @llvm.eh.unwind.init() is used. If we're not on // Darwin, ignore it. @@ -2121,13 +2234,20 @@ CR2Spilled = CR3Spilled = CR4Spilled = false; } - // Default behavior for non-CR saves. - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); + if (CSI[i].isSpilledToReg()) { + DebugLoc DL; + NumPEReloadVSR++; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) + .addReg(CSI[i].getDstReg(), getKillRegState(true)); + } else { + + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); } + } // Insert in reverse order. if (AtStart) Index: test/CodeGen/PowerPC/prolog_vec_spills.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/prolog_vec_spills.ll @@ -0,0 +1,25 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -ppc-enable-pe-vector-spills -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK + +define signext i32 @test1(i32 signext %a, i32 signext %b) #0 { +entry: +; CHECK: mtvsrd [[REG1:[0-9]+]], 14 +; CHECK: mtvsrd [[REG2:[0-9]+]], 15 +; CHECK: mtvsrd [[REG3:[0-9]+]], 16 +; CHECK: mffprd 16, [[REG3]] +; CHECK: mffprd 15, [[REG2]] +; CHECK: mffprd 14, [[REG1]] + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %dst = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + call void asm sideeffect "", "~{v20},~{f14},~{f0}"() + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %2 = call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16}"(i32 %0, i32 %1) + store i32 %2, i32* %dst, align 4 + %3 = load i32, i32* %dst, align 4 + ret i32 %3 +} + +attributes #0 = { nounwind }