Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -40,6 +40,7 @@ class MachineInstr; class RegScavenger; class VirtRegMap; +class LiveIntervals; class TargetRegisterClass { public: @@ -959,7 +960,8 @@ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { return true; } //===--------------------------------------------------------------------===// Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1583,7 +1583,7 @@ std::swap(SrcRC, DstRC); } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, - CP.getNewRC())) { + CP.getNewRC(), *LIS)) { DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); return false; } Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -22,6 +22,7 @@ namespace llvm { +class LiveIntervals; class MachineRegisterInfo; class SISubtarget; class SIMachineFunctionInfo; @@ -212,7 +213,8 @@ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1474,7 +1474,8 @@ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { unsigned SrcSize = getRegSizeInBits(*SrcRC); unsigned DstSize = getRegSizeInBits(*DstRC); unsigned NewSize = getRegSizeInBits(*NewRC); Index: lib/Target/ARM/ARMBaseRegisterInfo.h =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.h +++ lib/Target/ARM/ARMBaseRegisterInfo.h @@ -27,6 +27,8 @@ namespace llvm { +class LiveIntervals; + /// Register allocation hints. namespace ARMRI { @@ -204,7 +206,8 @@ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm Index: lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -807,7 +807,8 @@ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { auto MBB = MI->getParent(); auto MF = MBB->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -18,6 +18,8 @@ namespace llvm { +class LiveIntervals; + namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. @@ -59,6 +61,16 @@ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -10,6 +10,7 @@ #include "SystemZRegisterInfo.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" @@ -152,6 +153,72 @@ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } +bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { + assert (MI->isCopy() && "Only expecting COPY instructions"); + + // Coalesce anything which is not a COPY involving a subreg to/from GR128. + if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && + (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64))) + return true; + + // Allow coalescing of a GR128 subreg COPY only if the live ranges are small + // and local to one MBB with not too much interferring registers. Otherwise + // regalloc may run out of registers. + + unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); + unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); + unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); + LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); + + // Check that the two virtual registers are local to MBB. + MachineBasicBlock *MBB = MI->getParent(); + if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) || + LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB)) + return false; + + // Find the first and last MIs of the registers. + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + if (WideOpNo == 1) { + FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex()); + } else { + FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex()); + } + assert (FirstMI && LastMI && "No instruction from index?"); + + // Check if coalescing seems safe by finding the set of clobbered physreg + // pairs in the region. + BitVector PhysClobbered(getNumRegs()); + MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI; + MEE++; + for (; MII != MEE; ++MII) { + for (const MachineOperand &MO : MII->operands()) + if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); + SI.isValid(); ++SI) + if (NewRC->contains(*SI)) { + PhysClobbered.set(*SI); + break; + } + } + } + + // Demand an arbitrary margin of free regs. + unsigned const DemandedFreeGR128 = 3; + if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128)) + return false; + + return true; +} + unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); Index: test/CodeGen/SystemZ/regalloc-GR128.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/regalloc-GR128.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -O3 -o /dev/null +; +; Test that regalloc does not run out of registers + +; This test will include a GR128 virtual reg. +define void @test0(i64 %dividend, i64 %divisor) { + %rem = urem i64 %dividend, %divisor + call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem) + ret void +} + +; This test will include an ADDR128 virtual reg. +define i64 @test1(i64 %dividend, i64 %divisor) { +%rem = urem i64 %dividend, %divisor +call void asm sideeffect "", "{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem) +%ret = add i64 %rem, 1 +ret i64 %ret +}