diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -614,7 +614,8 @@ // Registers in the sequence are allocated contiguously so we can just // use register number to pick one of three round-robin temps. unsigned RegNo = DestReg % 3; - Register Tmp = AMDGPU::VGPR32; + Register Tmp = + MBB.getParent()->getInfo()->getVGPRForAGPRCopy(); assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && "VGPR used for an intermediate copy should have been reserved."); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -493,6 +493,16 @@ // frame, so save it here and add it to the RegScavenger later. Optional ScavengeFI; +private: + Register VGPRForAGPRCopy; + +public: + Register getVGPRForAGPRCopy() const { + assert(VGPRForAGPRCopy && + "Valid VGPR for AGPR copy must have been identified by now"); + return VGPRForAGPRCopy; + } + public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -187,6 +187,12 @@ S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); if (!S.empty()) S.consumeInteger(0, GDSSize); + + // On GFX908, in order to guarantee copying between AGPRs, we need a scratch + // VGPR available at all times. + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + VGPRForAGPRCopy = AMDGPU::VGPR_32RegClass.getRegister(32); + } } void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -698,7 +698,7 @@ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch // VGPR available at all times. if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { - reserveRegisterTuples(Reserved, AMDGPU::VGPR32); + reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } for (auto Reg : MFI->WWMReservedRegs) { @@ -1553,8 +1553,8 @@ assert(EltSize == 4); if (!TmpIntermediateVGPR) { - assert(MF->getRegInfo().isReserved(AMDGPU::VGPR32)); - TmpIntermediateVGPR = AMDGPU::VGPR32; + TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy(); + assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR)); } if (IsStore) { auto AccRead = BuildMI(MBB, MI, DL,