Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -540,7 +540,9 @@ MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, - RegScavenger &RS) { + RegScavenger &RS, + Register ImpDefSuperReg = Register(), + Register ImpUseSuperReg = Register()) { const SIRegisterInfo &RI = TII.getRegisterInfo(); assert(AMDGPU::SReg_32RegClass.contains(SrcReg) || @@ -571,8 +573,17 @@ DefOp.setIsKill(false); } - BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) + MachineInstrBuilder Builder = + BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) .add(DefOp); + if (ImpDefSuperReg) + Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); + + if (ImpUseSuperReg) { + Builder.addReg(ImpUseSuperReg, + getKillRegState(KillSrc) | RegState::Implicit); + } + return; } @@ -602,9 +613,27 @@ RS.setRegUsed(Tmp); } - TII.copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc); - BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) + // Insert copy to temporary VGPR. + unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32; + if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) { + TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32; + } else { + assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); + } + + MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp) + .addReg(SrcReg, getKillRegState(KillSrc)); + if (ImpUseSuperReg) { + UseBuilder.addReg(ImpUseSuperReg, + getKillRegState(KillSrc) | RegState::Implicit); + } + + MachineInstrBuilder DefBuilder + = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) .addReg(Tmp, RegState::Kill); + + if (ImpDefSuperReg) + DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); } void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -825,11 +854,21 @@ } } else if (RI.hasAGPRs(RC)) { Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ? - AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY; + AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::INSTRUCTION_LIST_END; } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) { Opcode = AMDGPU::V_ACCVGPR_READ_B32; } + // For the cases where we need an intermediate instruction/temporary register + // (the result is an SGPR, and the source is either an SGPR or AGPR), we need + // a scavenger. + // + // FIXME: The pass should maintain this for us so we don't have to re-scan the + // whole block for every handled copy. + std::unique_ptr RS; + if (Opcode == AMDGPU::INSTRUCTION_LIST_END) + RS.reset(new RegScavenger()); + ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); @@ -840,22 +879,24 @@ else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; - if (Opcode == TargetOpcode::COPY) { - copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), - RI.getSubReg(SrcReg, SubIdx), KillSrc); - continue; - } - - MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, - get(Opcode), RI.getSubReg(DestReg, SubIdx)); - Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); + bool UseKill = KillSrc && Idx == SubIndices.size() - 1; - if (Idx == 0) - Builder.addReg(DestReg, RegState::Define | RegState::Implicit); + if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { + Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register(); + Register ImpUseSuper = SrcReg; + indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), + RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, + ImpDefSuper, ImpUseSuper); + } else { + MachineInstrBuilder Builder = + BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)) + .addReg(RI.getSubReg(SrcReg, SubIdx)); + if (Idx == 0) + Builder.addReg(DestReg, RegState::Define | RegState::Implicit); - bool UseKill = KillSrc && Idx == SubIndices.size() - 1; - Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); + Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); + } } } Index: llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -144,9 +144,9 @@ liveins: $sgpr0_sgpr1 ; GCN-LABEL: name: s2_to_a2 ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec - ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1 ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1 $agpr0_agpr1 = COPY killed $sgpr0_sgpr1, implicit $exec @@ -175,9 +175,9 @@ bb.0: ; GCN-LABEL: name: a2_to_a2 ; GCN: $agpr0_agpr1 = IMPLICIT_DEF - ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec - ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec - ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit-def $agpr1_agpr2 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec ; GCN: S_ENDPGM 0, implicit $agpr1_agpr2 $agpr0_agpr1 = IMPLICIT_DEF @@ -205,3 +205,139 @@ $agpr0 = COPY killed $agpr1, implicit $exec S_ENDPGM 0, implicit $agpr0 ... + +--- +name: copy_sgpr_to_agpr_tuple +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $sgpr2_sgpr3 + + ; GCN-LABEL: name: copy_sgpr_to_agpr_tuple + ; GCN: liveins: $agpr0, $sgpr2_sgpr3 + ; GCN: S_NOP 0, implicit-def dead $sgpr0_sgpr1 + ; GCN: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + S_NOP 0, implicit-def dead $sgpr0_sgpr1 + renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 +... +--- +name: copy_sgpr_to_agpr_tuple_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $sgpr2_sgpr3 + + ; GCN-LABEL: name: copy_sgpr_to_agpr_tuple_kill + ; GCN: liveins: $agpr0, $sgpr2_sgpr3 + ; GCN: S_NOP 0, implicit-def dead $sgpr0_sgpr1 + ; GCN: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 + S_NOP 0, implicit-def dead $sgpr0_sgpr1 + renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 +... + +--- +name: copy_agpr_to_agpr_tuple +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $agpr2_agpr3 + + ; GCN-LABEL: name: copy_agpr_to_agpr_tuple + ; GCN: liveins: $agpr0, $agpr2_agpr3 + ; GCN: S_NOP 0, implicit-def dead $agpr0_agpr1 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + S_NOP 0, implicit-def dead $agpr0_agpr1 + renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 +... + +--- +name: copy_agpr_to_agpr_tuple_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $agpr2_agpr3 + + ; GCN-LABEL: name: copy_agpr_to_agpr_tuple_kill + ; GCN: liveins: $agpr0, $agpr2_agpr3 + ; GCN: S_NOP 0, implicit-def dead $agpr0_agpr1 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 + S_NOP 0, implicit-def dead $agpr0_agpr1 + renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 +... + +--- +name: a4_to_a4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1_agpr2_agpr3 + ; GCN-LABEL: name: a4_to_a4 + ; GCN: liveins: $agpr0_agpr1_agpr2_agpr3 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 + $agpr4_agpr5_agpr6_agpr7 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 +... + +--- +name: a4_to_a4_overlap +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1_agpr2_agpr3 + ; GCN-LABEL: name: a4_to_a4_overlap + ; GCN: liveins: $agpr0_agpr1_agpr2_agpr3 + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 + $agpr2_agpr3_agpr4_agpr5 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 +...