Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -493,6 +493,12 @@ void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; + void legalizeGenericOperand(MachineBasicBlock &InsertMBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *DstRC, + MachineOperand &Op, MachineRegisterInfo &MRI, + const DebugLoc &DL) const; + /// \brief Legalize all operands in this instruction. This function may /// create new instruction and insert them before \p MI. void legalizeOperands(MachineInstr &MI) const; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2281,6 +2281,39 @@ } } +void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *DstRC, + MachineOperand &Op, + MachineRegisterInfo &MRI, + const DebugLoc &DL) const { + + unsigned OpReg = Op.getReg(); + unsigned OpSubReg = Op.getSubReg(); + + const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( + RI.getRegClassForReg(MRI, OpReg), OpSubReg); + + // Check if operand is already the correct register class. + if (DstRC == OpRC) + return; + + unsigned DstReg = MRI.createVirtualRegister(DstRC); + MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg) + .addOperand(Op); + + Op.setReg(DstReg); + Op.setSubReg(0); + + MachineInstr *Def = MRI.getVRegDef(OpReg); + if (!Def) + return; + + // Try to eliminate the copy if it is copying an immediate value. + if (Def->isMoveImmediate()) + FoldImmediate(*Copy, *Def, OpReg, &MRI); +} + void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -2338,15 +2371,14 @@ MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; - unsigned DstReg = MRI.createVirtualRegister(RC); // MI is a PHI instruction. MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); - BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) - .addOperand(Op); - Op.setReg(DstReg); + // Avoid creating no-op copies with the same src and dst reg class. These + // confuse some of the machine passes. + legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); } } @@ -2370,12 +2402,7 @@ if (VRC == OpRC) continue; - unsigned DstReg = MRI.createVirtualRegister(VRC); - - BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) - .addOperand(Op); - - Op.setReg(DstReg); + legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); Op.setIsKill(); } } @@ -2391,11 +2418,9 @@ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { - MachineBasicBlock &MBB = *MI.getParent(); - unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); - BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0) - .addReg(Src0); - MI.getOperand(1).setReg(NewSrc0); + MachineBasicBlock *MBB = MI.getParent(); + MachineOperand &Op = MI.getOperand(1); + legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); } return; } @@ -2735,6 +2760,21 @@ continue; unsigned DstReg = Inst.getOperand(0).getReg(); + if (Inst.isCopy() && + TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && + NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { + // Instead of creating a copy where src and dst are the same register + // class, we just replace all uses of dst with src. These kinds of + // copies interfere with the heuristics MachineSink uses to decide + // whether or not to split a critical edge. Since the pass assumes + // that copies will end up as machine instructions and not be + // eliminated. + addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); + MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); + Inst.getOperand(0).setReg(DstReg); + continue; + } + NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); } Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -187,6 +187,8 @@ unsigned getSGPRPressureSet() const { return SGPRSetID; }; unsigned getVGPRPressureSet() const { return VGPRSetID; }; + const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, + unsigned Reg) const; bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; bool isSGPRPressureSet(unsigned SetID) const { Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -993,13 +993,16 @@ } } -bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, - unsigned Reg) const { - const TargetRegisterClass *RC; +const TargetRegisterClass* +SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, + unsigned Reg) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) - RC = MRI.getRegClass(Reg); - else - RC = getPhysRegClass(Reg); + return MRI.getRegClass(Reg); + + return getPhysRegClass(Reg); +} - return hasVGPRs(RC); +bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, + unsigned Reg) const { + return hasVGPRs(getRegClassForReg(MRI, Reg)); }