diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -28,6 +28,10 @@ const unsigned BasePointerSaveOffset; const unsigned CRSaveOffset; + // Map each group of one or two GPRs to corresponding VSR for spilling. + // TODO: Use local table in methods to avoid this mutable member. + mutable DenseMap> VSRToGPRs; + /** * Find register[s] that can be used in function prologue and epilogue * diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2248,30 +2248,39 @@ BVCalleeSaved.set(CSRegs[i]); for (unsigned Reg : BVAllocatable.set_bits()) { - // Set to 0 if the register is not a volatile VF/F8 register, or if it is + // Set to 0 if the register is not a volatile VSX register, or if it is // used in the function. - if (BVCalleeSaved[Reg] || - (!PPC::F8RCRegClass.contains(Reg) && - !PPC::VFRCRegClass.contains(Reg)) || - (MF.getRegInfo().isPhysRegUsed(Reg))) + if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || + MF.getRegInfo().isPhysRegUsed(Reg)) BVAllocatable.reset(Reg); } bool AllSpilledToReg = true; + unsigned LastSpilledVSR = 0; for (auto &CS : CSI) { if (BVAllocatable.none()) return false; unsigned Reg = CS.getReg(); - if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { + + if (!PPC::G8RCRegClass.contains(Reg)) { AllSpilledToReg = false; continue; } + // For P9, we can reuse LastSpilledVSR to spill two GPRs + // into one VSR using the mtvsrdd instruction. + if (LastSpilledVSR != 0) { + CS.setDstReg(LastSpilledVSR); + BVAllocatable.reset(LastSpilledVSR); + LastSpilledVSR = 0; + continue; + } + unsigned VolatileVFReg = BVAllocatable.find_first(); if (VolatileVFReg < BVAllocatable.size()) { CS.setDstReg(VolatileVFReg); - BVAllocatable.reset(VolatileVFReg); + LastSpilledVSR = VolatileVFReg; } else { AllSpilledToReg = false; } @@ -2290,6 +2299,18 @@ DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; + BitVector Spilled(TRI->getNumRegs()); + + VSRToGPRs.clear(); + + // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one + // or two GPRs, so we need table to record information for later save/restore. + llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) { + if (Info.isSpilledToReg()) { + VSRToGPRs.FindAndConstruct(Info.getDstReg()) + .second.push_back(Info.getReg()); + } + }); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -2339,9 +2360,29 @@ } } else { if (CSI[i].isSpilledToReg()) { - NumPESpillVSR++; - BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) - .addReg(Reg, getKillRegState(true)); + unsigned Dst = CSI[i].getDstReg(); + + if (Spilled[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + + NumPESpillVSR += 2; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)) + .addReg(VSRToGPRs[Dst][1], getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + + ++NumPESpillVSR; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), + TRI->getSubReg(Dst, PPC::sub_64)) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)); + } else { + llvm_unreachable("More than two GPRs spilled to a VSR!"); + } + Spilled.set(Dst); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); // Use !IsLiveIn for the kill flag. @@ -2445,6 +2486,7 @@ bool CR3Spilled = false; bool CR4Spilled = false; unsigned CSIIndex = 0; + BitVector Restored(TRI->getNumRegs()); // Initialize insertion-point logic; we will be restoring in reverse // order of spill. @@ -2489,9 +2531,29 @@ if (CSI[i].isSpilledToReg()) { DebugLoc DL; - NumPEReloadVSR++; - BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) - .addReg(CSI[i].getDstReg(), getKillRegState(true)); + unsigned Dst = CSI[i].getDstReg(); + + if (Restored[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + NumPEReloadVSR += 2; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), VSRToGPRs[Dst][1]) + .addReg(Dst); + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + ++NumPEReloadVSR; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); + } else { + llvm_unreachable("More than two GPRs spilled to a VSR!"); + } + + Restored.set(Dst); + } else { // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); diff --git a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir --- a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir +++ b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir @@ -14,15 +14,15 @@ $v20 = IMPLICIT_DEF BLR8 implicit undef $lr8, implicit undef $rm +# Use mtvsrdd to save two GPRs in a single instruction # CHECK-LABEL: name: test1BB # CHECK: body: | # CHECK: liveins: $x14, $x15, $x16, $v20 -# CHECK: $f1 = MTVSRD killed $x14 -# CHECK-NEXT: $f2 = MTVSRD killed $x15 -# CHECK-NEXT: $f3 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f3 -# CHECK-NEXT: $x15 = MFVSRD killed $f2 -# CHECK-NEXT: $x14 = MFVSRD killed $f1 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x15 = MFVSRLD $v0 +# CHECK-NEXT: $x14 = MFVSRD killed $vf0 ... --- @@ -55,15 +55,14 @@ ## clobbered before restored in the epilogue. # CHECK-LABEL: name: test2BB # CHECK: body: | -# CHECK: $f0 = MTVSRD killed $x14 -# CHECK-NEXT: $f1 = MTVSRD killed $x15 -# CHECK-NEXT: $f2 = MTVSRD killed $x16 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 # CHECK: bb.2: # CHECK-NEXT: successors: %bb.3 -# CHECK-NEXT: liveins: $f0, $f1, $f2 +# CHECK-NEXT: liveins: $v0, $v1 # CHECK: bb.3: -# CHECK-NEXT: liveins: $f0, $f1, $f2 -# CHECK: $x16 = MFVSRD killed $f2 -# CHECK-NEXT: $x15 = MFVSRD killed $f1 -# CHECK-NEXT: $x14 = MFVSRD killed $f0 +# CHECK-NEXT: liveins: $v0, $v1 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x15 = MFVSRLD $v0 +# CHECK-NEXT: $x14 = MFVSRD killed $vf0 ...