diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -28,6 +28,10 @@ const unsigned BasePointerSaveOffset; const unsigned CRSaveOffset; + // Used to record the mapping of VSRs to spilled callee saved GPRs, + // where one VSR can be used to save two GPRs. + mutable std::map> VSRToGPRs; + /** * Find register[s] that can be used in function prologue and epilogue * diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2093,30 +2093,39 @@ BVCalleeSaved.set(CSRegs[i]); for (unsigned Reg : BVAllocatable.set_bits()) { - // Set to 0 if the register is not a volatile VF/F8 register, or if it is + // Set to 0 if the register is not a volatile VSX register, or if it is // used in the function. - if (BVCalleeSaved[Reg] || - (!PPC::F8RCRegClass.contains(Reg) && - !PPC::VFRCRegClass.contains(Reg)) || - (MF.getRegInfo().isPhysRegUsed(Reg))) + if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || + MF.getRegInfo().isPhysRegUsed(Reg)) BVAllocatable.reset(Reg); } bool AllSpilledToReg = true; + unsigned LastSpilledVSR = 0; for (auto &CS : CSI) { if (BVAllocatable.none()) return false; unsigned Reg = CS.getReg(); - if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { + + if (!PPC::G8RCRegClass.contains(Reg)) { AllSpilledToReg = false; continue; } + // For P9, we can reuse LastSpilledVSR to spill two GPRs + // into one VSR using the mtvsrdd instruction. + if (LastSpilledVSR != 0) { + CS.setDstReg(LastSpilledVSR); + BVAllocatable.reset(LastSpilledVSR); + LastSpilledVSR = 0; + continue; + } + unsigned VolatileVFReg = BVAllocatable.find_first(); if (VolatileVFReg < BVAllocatable.size()) { CS.setDstReg(VolatileVFReg); - BVAllocatable.reset(VolatileVFReg); + LastSpilledVSR = VolatileVFReg; } else { AllSpilledToReg = false; } @@ -2135,6 +2144,22 @@ DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; + BitVector HasSpilledBV(TRI->getNumRegs()); + + VSRToGPRs.clear(); + + // Populate the map of the destination VSR + // and their corresponding spilled GPRs. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + if (!CSI[i].isSpilledToReg()) + continue; + + unsigned Dst = CSI[i].getDstReg(); + if (VSRToGPRs.find(Dst) == VSRToGPRs.end()) + VSRToGPRs[Dst] = SmallVector(); + + VSRToGPRs[Dst].push_back(CSI[i].getReg()); + } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -2187,9 +2212,29 @@ } } else { if (CSI[i].isSpilledToReg()) { - NumPESpillVSR++; - BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) - .addReg(Reg, getKillRegState(true)); + unsigned Dst = CSI[i].getDstReg(); + + if (HasSpilledBV[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + + NumPESpillVSR += 2; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)) + .addReg(VSRToGPRs[Dst][1], getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + + NumPESpillVSR++; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), + TRI->getSubReg(Dst, PPC::sub_64)) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)); + } else { + llvm_unreachable("Incorrect size of VSRToGPRs!"); + } + HasSpilledBV.set(Dst); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); // Use !IsLiveIn for the kill flag. @@ -2302,6 +2347,8 @@ if (!AtStart) --BeforeI; + BitVector HasRestoredBV(TRI->getNumRegs()); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -2341,9 +2388,29 @@ if (CSI[i].isSpilledToReg()) { DebugLoc DL; - NumPEReloadVSR++; - BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) - .addReg(CSI[i].getDstReg(), getKillRegState(true)); + unsigned Dst = CSI[i].getDstReg(); + + if (HasRestoredBV[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + NumPEReloadVSR += 2; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64)); + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), VSRToGPRs[Dst][1]) + .addReg(Dst, getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + NumPEReloadVSR++; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); + } else { + llvm_unreachable("Incorrect size of VSRToGPRs!"); + } + + HasRestoredBV.set(Dst); + } else { // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); diff --git a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir --- a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir +++ b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir @@ -14,14 +14,14 @@ $v20 = IMPLICIT_DEF BLR8 implicit undef $lr8, implicit undef $rm +# Use mtvsrdd to save two GPRs in a single instruction # CHECK-LABEL: name: test1BB # CHECK: body: | -# CHECK: $f1 = MTVSRD killed $x14 -# CHECK-NEXT: $f2 = MTVSRD killed $x15 -# CHECK-NEXT: $f3 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f3 -# CHECK-NEXT: $x15 = MFVSRD killed $f2 -# CHECK-NEXT: $x14 = MFVSRD killed $f1 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x14 = MFVSRD $vf0 +# CHECK-NEXT: $x15 = MFVSRLD killed $v0 ... --- @@ -52,10 +52,9 @@ # CHECK-LABEL: name: test2BB # CHECK: body: | -# CHECK: $f0 = MTVSRD killed $x14 -# CHECK-NEXT: $f1 = MTVSRD killed $x15 -# CHECK-NEXT: $f2 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f2 -# CHECK-NEXT: $x15 = MFVSRD killed $f1 -# CHECK-NEXT: $x14 = MFVSRD killed $f0 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x14 = MFVSRD $vf0 +# CHECK-NEXT: $x15 = MFVSRLD killed $v0 ...