Index: llvm/lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -28,6 +28,10 @@ const unsigned LinkageSize; const unsigned BasePointerSaveOffset; + // Used to record the mapping of VSRs to spilled callee saved GPRs, + // where one VSR can be used to save several GPRs. + mutable std::map> VSRToGPRs; + /** * Find register[s] that can be used in function prologue and epilogue * Index: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2116,30 +2116,43 @@ BVCalleeSaved.set(CSRegs[i]); for (unsigned Reg : BVAllocatable.set_bits()) { - // Set to 0 if the register is not a volatile VF/F8 register, or if it is + // Set to 0 if the register is not a volatile VSX register, or if it is // used in the function. - if (BVCalleeSaved[Reg] || - (!PPC::F8RCRegClass.contains(Reg) && - !PPC::VFRCRegClass.contains(Reg)) || - (MF.getRegInfo().isPhysRegUsed(Reg))) + if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || + MF.getRegInfo().isPhysRegUsed(Reg)) BVAllocatable.reset(Reg); } bool AllSpilledToReg = true; + unsigned LastSpilledVSR = 0; + for (auto &CS : CSI) { if (BVAllocatable.none()) return false; unsigned Reg = CS.getReg(); - if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { + // Simplify to do for G8RC only + if (!PPC::G8RCRegClass.contains(Reg)) { AllSpilledToReg = false; continue; } + // For P9, we can reuse LastSpilledVSR to spill two GPRs + // into one VSR using the mtvsrdd instruction. + if (Subtarget.hasP9Vector() && LastSpilledVSR != 0) { + CS.setDstReg(LastSpilledVSR); + BVAllocatable.reset(LastSpilledVSR); + LastSpilledVSR = 0; + continue; + } + unsigned VolatileVFReg = BVAllocatable.find_first(); if (VolatileVFReg < BVAllocatable.size()) { CS.setDstReg(VolatileVFReg); - BVAllocatable.reset(VolatileVFReg); + if (Subtarget.hasP9Vector()) + LastSpilledVSR = VolatileVFReg; + else + BVAllocatable.reset(VolatileVFReg); } else { AllSpilledToReg = false; } @@ -2147,7 +2160,6 @@ return AllSpilledToReg; } - bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2164,6 +2176,22 @@ DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; + BitVector HasSpilledBV(TRI->getNumRegs()); + + VSRToGPRs.clear(); + + // Populate the map of the destination VSR + // and their corresponding spilled GPRs. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + if (!CSI[i].isSpilledToReg()) + continue; + + unsigned Dst = CSI[i].getDstReg(); + if (VSRToGPRs.find(Dst) == VSRToGPRs.end()) + VSRToGPRs[Dst] = SmallVector(); + + VSRToGPRs[Dst].push_back(CSI[i].getReg()); + } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -2214,9 +2242,32 @@ } } else { if (CSI[i].isSpilledToReg()) { - NumPESpillVSR++; - BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) - .addReg(Reg, getKillRegState(true)); + + unsigned Dst = CSI[i].getDstReg(); + + if (HasSpilledBV[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + + NumPESpillVSR += 2; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)) + .addReg(VSRToGPRs[Dst][1], getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + + NumPESpillVSR++; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), + TRI->getSubReg(Dst, PPC::sub_64)) + .addReg(VSRToGPRs[Dst][0], getKillRegState(true)); + } else { + assert(0); + } + + HasSpilledBV.set(Dst); + } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); // Use !IsLiveIn for the kill flag. @@ -2331,6 +2382,8 @@ if (!AtStart) --BeforeI; + BitVector HasRestoredBV(TRI->getNumRegs()); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -2366,9 +2419,29 @@ if (CSI[i].isSpilledToReg()) { DebugLoc DL; - NumPEReloadVSR++; - BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) - .addReg(CSI[i].getDstReg(), getKillRegState(true)); + unsigned Dst = CSI[i].getDstReg(); + + if (HasRestoredBV[Dst]) + continue; + + if (VSRToGPRs[Dst].size() == 2) { + assert(Subtarget.hasP9Vector()); + NumPEReloadVSR += 2; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64)); + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), VSRToGPRs[Dst][1]) + .addReg(Dst, getKillRegState(true)); + } else if (VSRToGPRs[Dst].size() == 1) { + assert(Subtarget.hasP8Vector()); + NumPEReloadVSR++; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), VSRToGPRs[Dst][0]) + .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); + } else { + assert(0); + } + + HasRestoredBV.set(Dst); + } else { // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); Index: llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir =================================================================== --- llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir +++ llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir @@ -16,12 +16,12 @@ # CHECK-LABEL: name: test1BB # CHECK: body: | -# CHECK: $f1 = MTVSRD killed $x14 -# CHECK-NEXT: $f2 = MTVSRD killed $x15 -# CHECK-NEXT: $f3 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f3 -# CHECK-NEXT: $x15 = MFVSRD killed $f2 -# CHECK-NEXT: $x14 = MFVSRD killed $f1 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x14 = MFVSRD $vf0 +# CHECK-NEXT: $x15 = MFVSRLD killed $v0 + ... --- @@ -53,10 +53,9 @@ # CHECK-LABEL: name: test2BB # CHECK: body: | -# CHECK: $f0 = MTVSRD killed $x14 -# CHECK-NEXT: $f1 = MTVSRD killed $x15 -# CHECK-NEXT: $f2 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f2 -# CHECK-NEXT: $x15 = MFVSRD killed $f1 -# CHECK-NEXT: $x14 = MFVSRD killed $f0 +# CHECK: $v0 = MTVSRDD killed $x14, killed $x15 +# CHECK-NEXT: $vf1 = MTVSRD killed $x16 +# CHECK: $x16 = MFVSRD killed $vf1 +# CHECK-NEXT: $x14 = MFVSRD $vf0 +# CHECK-NEXT: $x15 = MFVSRLD killed $v0 ...