diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -637,6 +637,54 @@ DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); } +static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, bool KillSrc, + const TargetRegisterClass *RC, bool Forward) { + const SIRegisterInfo &RI = TII.getRegisterInfo(); + ArrayRef BaseIndices = RI.getRegSplitParts(RC, 4); + MachineBasicBlock::iterator I = MI; + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + + for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { + int16_t SubIdx = BaseIndices[Idx]; + Register Reg = RI.getSubReg(DestReg, SubIdx); + unsigned Opcode = AMDGPU::S_MOV_B32; + + // Is SGPR aligned? If so try to combine with next. + Register Src = RI.getSubReg(SrcReg, SubIdx); + bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0; + bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0; + if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { + // Can use SGPR64 copy + unsigned Channel = RI.getChannelFromSubReg(SubIdx); + SubIdx = RI.getSubRegFromChannel(Channel, 2); + Opcode = AMDGPU::S_MOV_B64; + Idx++; + } + + LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx)) + .addReg(RI.getSubReg(SrcReg, SubIdx)) + .addReg(SrcReg, RegState::Implicit); + + if (!FirstMI) + FirstMI = LastMI; + + if (!Forward) + I--; + } + + assert(FirstMI && LastMI); + if (!Forward) + std::swap(FirstMI, LastMI); + + FirstMI->addOperand( + MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/)); + + if (KillSrc) + LastMI->addRegisterKilled(SrcReg, &RI); +} + void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, @@ -842,23 +890,18 @@ return; } - unsigned EltSize = 4; - unsigned Opcode = AMDGPU::V_MOV_B32_e32; + const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); if (RI.isSGPRClass(RC)) { - // TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32. - if (!(RI.getRegSizeInBits(*RC) % 64)) { - Opcode = AMDGPU::S_MOV_B64; - EltSize = 8; - } else { - Opcode = AMDGPU::S_MOV_B32; - EltSize = 4; - } - if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } - } else if (RI.hasAGPRs(RC)) { + expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward); + return; + } + + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (RI.hasAGPRs(RC)) { Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ? AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::INSTRUCTION_LIST_END; } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) { @@ -866,8 +909,7 @@ } // For the cases where we need an intermediate instruction/temporary register - // (the result is an SGPR, and the source is either an SGPR or AGPR), we need - // a scavenger. + // (destination is an AGPR), we need a scavenger. // // FIXME: The pass should maintain this for us so we don't have to re-scan the // whole block for every handled copy. @@ -875,8 +917,7 @@ if (Opcode == AMDGPU::INSTRUCTION_LIST_END) RS.reset(new RegScavenger()); - ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); - bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); + ArrayRef SubIndices = RI.getRegSplitParts(RC, 4); for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { unsigned SubIdx; @@ -885,7 +926,6 @@ else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; - bool UseKill = KillSrc && Idx == SubIndices.size() - 1; if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir @@ -47,9 +47,8 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2 ; GFX9-LABEL: name: sgpr96_aligned_src_dst - ; GFX9: $sgpr8 = S_MOV_B32 $sgpr2, implicit-def $sgpr6_sgpr7_sgpr8, implicit $sgpr0_sgpr1_sgpr2 - ; GFX9: $sgpr7 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2 - ; GFX9: $sgpr6 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2 + ; GFX9: $sgpr8 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr6_sgpr7_sgpr8 + ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2 $sgpr6_sgpr7_sgpr8 = COPY $sgpr0_sgpr1_sgpr2 ... @@ -59,7 +58,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2 ; GFX9-LABEL: name: sgpr96_aligned_src - ; GFX9: $sgpr5 = S_MOV_B32 $sgpr2, implicit-def $sgpr3_sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2 + ; GFX9: $sgpr5 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr3_sgpr4_sgpr5 ; GFX9: $sgpr4 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2 ; GFX9: $sgpr3 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2 $sgpr3_sgpr4_sgpr5 = COPY $sgpr0_sgpr1_sgpr2 @@ -71,7 +70,7 @@ bb.0: liveins: $sgpr3_sgpr4_sgpr5 ; GFX9-LABEL: name: sgpr96_aligned_dst - ; GFX9: $sgpr0 = S_MOV_B32 $sgpr3, implicit-def $sgpr0_sgpr1_sgpr2, implicit $sgpr3_sgpr4_sgpr5 + ; GFX9: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2 ; GFX9: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5 ; GFX9: $sgpr2 = S_MOV_B32 $sgpr5, implicit $sgpr3_sgpr4_sgpr5 $sgpr0_sgpr1_sgpr2 = COPY $sgpr3_sgpr4_sgpr5 @@ -83,8 +82,7 @@ bb.0: liveins: $sgpr3_sgpr4_sgpr5 ; GFX9-LABEL: name: sgpr96_unaligned_src_dst - ; GFX9: $sgpr11 = S_MOV_B32 $sgpr5, implicit-def $sgpr9_sgpr10_sgpr11, implicit $sgpr3_sgpr4_sgpr5 - ; GFX9: $sgpr10 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5 + ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11 ; GFX9: $sgpr9 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5 $sgpr9_sgpr10_sgpr11 = COPY $sgpr3_sgpr4_sgpr5 ... @@ -95,8 +93,7 @@ bb.0: liveins: $sgpr3_sgpr4_sgpr5 ; GFX9-LABEL: name: sgpr96_killed - ; GFX9: $sgpr11 = S_MOV_B32 $sgpr5, implicit-def $sgpr9_sgpr10_sgpr11, implicit $sgpr3_sgpr4_sgpr5 - ; GFX9: $sgpr10 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5 + ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11 ; GFX9: $sgpr9 = S_MOV_B32 $sgpr3, implicit killed $sgpr3_sgpr4_sgpr5 $sgpr9_sgpr10_sgpr11 = COPY killed $sgpr3_sgpr4_sgpr5 ... @@ -107,7 +104,7 @@ bb.0: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9-LABEL: name: sgpr128_forward - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ... @@ -118,7 +115,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-LABEL: name: sgpr128_backward - ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 $sgpr4_sgpr5_sgpr6_sgpr7 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ... @@ -129,7 +126,7 @@ bb.0: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9-LABEL: name: sgpr128_killed - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7 ... @@ -140,10 +137,8 @@ bb.0: liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 ; GFX9-LABEL: name: sgpr160_forward - ; GFX9: $sgpr0 = S_MOV_B32 $sgpr8, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 - ; GFX9: $sgpr1 = S_MOV_B32 $sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 - ; GFX9: $sgpr2 = S_MOV_B32 $sgpr10, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 - ; GFX9: $sgpr3 = S_MOV_B32 $sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 ; GFX9: $sgpr4 = S_MOV_B32 $sgpr12, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 ... @@ -154,11 +149,9 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ; GFX9-LABEL: name: sgpr160_backward - ; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr11 = S_MOV_B32 $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr10 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr9 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr8 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ... @@ -168,11 +161,9 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ; GFX9-LABEL: name: sgpr160_killed - ; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr11 = S_MOV_B32 $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr10 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr9 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GFX9: $sgpr8 = S_MOV_B32 $sgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ... @@ -183,7 +174,7 @@ bb.0: liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 ; GFX9-LABEL: name: sgpr192_forward - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 ; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 @@ -195,7 +186,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX9-LABEL: name: sgpr192_backward - ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 @@ -207,7 +198,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX9-LABEL: name: sgpr192_killed - ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 @@ -219,7 +210,7 @@ bb.0: liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-LABEL: name: sgpr256_forward - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -232,7 +223,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9-LABEL: name: sgpr256_backward - ; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -245,7 +236,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9-LABEL: name: sgpr256_killed - ; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -258,7 +249,7 @@ bb.0: liveins: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9-LABEL: name: sgpr512_forward - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr16_sgpr17, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr16_sgpr17, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr18_sgpr19, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr20_sgpr21, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr22_sgpr23, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 @@ -275,7 +266,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-LABEL: name: sgpr512_backward - ; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr28_sgpr29 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr26_sgpr27 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr24_sgpr25 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -292,7 +283,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-LABEL: name: sgpr512_killed - ; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr28_sgpr29 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr26_sgpr27 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr24_sgpr25 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -309,7 +300,7 @@ bb.0: liveins: $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX9-LABEL: name: sgpr1024_forward - ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr32_sgpr33, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr32_sgpr33, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr34_sgpr35, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr36_sgpr37, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr38_sgpr39, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 @@ -334,7 +325,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9-LABEL: name: sgpr1024_backward - ; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX9: $sgpr60_sgpr61 = S_MOV_B64 $sgpr28_sgpr29, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr58_sgpr59 = S_MOV_B64 $sgpr26_sgpr27, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr56_sgpr57 = S_MOV_B64 $sgpr24_sgpr25, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 @@ -359,7 +350,7 @@ bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9-LABEL: name: sgpr1024_killed - ; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX9: $sgpr60_sgpr61 = S_MOV_B64 $sgpr28_sgpr29, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr58_sgpr59 = S_MOV_B64 $sgpr26_sgpr27, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GFX9: $sgpr56_sgpr57 = S_MOV_B64 $sgpr24_sgpr25, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31