Index: llvm/include/llvm/CodeGen/MachineInstr.h =================================================================== --- llvm/include/llvm/CodeGen/MachineInstr.h +++ llvm/include/llvm/CodeGen/MachineInstr.h @@ -1120,6 +1120,10 @@ return isCopy() && !getOperand(0).getSubReg() && !getOperand(1).getSubReg(); } + bool isTiedCopy() const { + return getOpcode() == TargetOpcode::TIED_COPY; + } + bool isExtractSubreg() const { return getOpcode() == TargetOpcode::EXTRACT_SUBREG; } @@ -1136,6 +1140,11 @@ getOperand(0).getSubReg() == getOperand(1).getSubReg(); } + bool isIdentityTiedCopy() const { + return isTiedCopy() && getOperand(0).getReg() == getOperand(1).getReg() && + getOperand(0).getSubReg() == getOperand(1).getSubReg(); + } + /// Return true if this instruction doesn't produce any output in the form of /// executable instructions. bool isMetaInstruction() const { Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -97,6 +97,9 @@ /// used to copy between subregisters of virtual registers. HANDLE_TARGET_OPCODE(COPY) +/// COPY - Target-independent register copy with tied constraint. +HANDLE_TARGET_OPCODE(TIED_COPY) + /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with /// 'InsideBundle' flag are inside the bundle. Index: llvm/include/llvm/Target/Target.td =================================================================== --- llvm/include/llvm/Target/Target.td +++ llvm/include/llvm/Target/Target.td @@ -1099,6 +1099,20 @@ let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } + +// A copy with the requirement that the source and result registers +// must be allocated to the same register. This can be used for +// constraining a value to a single register inside a loop in SSA +// form. +def TIED_COPY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let Constraints = "$dst = $src"; + let AsmString = ""; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; +} + def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); Index: llvm/lib/CodeGen/ExpandPostRAPseudos.cpp =================================================================== --- llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -212,6 +212,7 @@ MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::TIED_COPY: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: Index: llvm/lib/CodeGen/VirtRegMap.cpp =================================================================== --- llvm/lib/CodeGen/VirtRegMap.cpp +++ llvm/lib/CodeGen/VirtRegMap.cpp @@ -372,7 +372,7 @@ } void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { - if (!MI.isIdentityCopy()) + if (!MI.isIdentityCopy() && !MI.isIdentityTiedCopy()) return; LLVM_DEBUG(dbgs() << "Identity copy: " << MI); ++NumIdCopies; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3438,6 +3438,7 @@ MachineBasicBlock::iterator I(&MI); Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpDstReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg); @@ -3447,17 +3448,20 @@ MachineBasicBlock *LoopBB = InsPt->getParent(); if (UseGPRIdxMode) { - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpDstReg) .addReg(SrcReg, RegState::Undef, SubReg) .addReg(SrcReg, RegState::Implicit) .addReg(AMDGPU::M0, RegState::Implicit); BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); } else { - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), TmpDstReg) .addReg(SrcReg, RegState::Undef, SubReg) .addReg(SrcReg, RegState::Implicit); } + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::TIED_COPY), Dst) + .addReg(TmpDstReg); + MI.eraseFromParent(); return LoopBB; @@ -3525,18 +3529,22 @@ const DebugLoc &DL = MI.getDebugLoc(); Register PhiReg = MRI.createVirtualRegister(VecRC); + Register TmpDstReg = MRI.createVirtualRegister(VecRC); auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset, UseGPRIdxMode, false); MachineBasicBlock *LoopBB = InsPt->getParent(); - BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst) + BuildMI(*LoopBB, InsPt, DL, MovRelDesc, TmpDstReg) .addReg(PhiReg) .add(*Val) .addImm(AMDGPU::sub0); if (UseGPRIdxMode) BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::TIED_COPY), Dst) + .addReg(TmpDstReg); + MI.eraseFromParent(); return LoopBB; }