Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -582,12 +582,11 @@ // Registers in the sequence are allocated contiguously so we can just // use register number to pick one of three round-robin temps. unsigned RegNo = DestReg % 3; - Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); - if (!Tmp) - report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); - RS.setRegUsed(Tmp); - + Register Tmp; if (!TII.getSubtarget().hasGFX90AInsts()) { + Tmp = AMDGPU::VGPR32; + assert(MBB.getParent()->getRegInfo().isReserved(AMDGPU::VGPR32)); + // Only loop through if there are any free registers left, otherwise // scavenger may report a fatal error without emergency spill slot // or spill with the slot. @@ -598,6 +597,8 @@ Tmp = Tmp2; RS.setRegUsed(Tmp); } + } else { + Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); } // Insert copy to temporary VGPR. Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -600,6 +600,10 @@ } else MaxNumAGPRs = 0; } + } else if (ST.hasMAIInsts() && MFI->usesAGPRs(MF)) { + // In order to guarantee copying between AGPRs, we need a scratch VGPR + // available at all times. + reserveRegisterTuples(Reserved, AMDGPU::VGPR32); } for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { Index: llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -408,8 +408,8 @@ ; GFX908-LABEL: name: s_to_a ; GFX908: liveins: $sgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0 ; GFX90A-LABEL: name: s_to_a ; GFX90A: liveins: $sgpr0 @@ -430,10 +430,10 @@ ; GFX908-LABEL: name: s2_to_a2 ; GFX908: liveins: $sgpr0_sgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 ; GFX90A-LABEL: name: s2_to_a2 ; GFX90A: liveins: $sgpr0_sgpr1 @@ -456,12 +456,12 @@ ; GFX908-LABEL: name: s3_to_a3 ; GFX908: liveins: $sgpr0_sgpr1_sgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; GFX90A-LABEL: name: s3_to_a3 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2 @@ -486,14 +486,14 @@ ; GFX908-LABEL: name: s4_to_a4 ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX90A-LABEL: name: s4_to_a4 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 @@ -520,18 +520,18 @@ ; GFX908-LABEL: name: s6_to_a6 ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-LABEL: name: s6_to_a6 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 @@ -562,22 +562,22 @@ ; GFX908-LABEL: name: s8_to_a8 ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-LABEL: name: s8_to_a8 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -612,38 +612,38 @@ ; GFX908-LABEL: name: s16_to_a16 ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX908-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX908-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-LABEL: name: s16_to_a16 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -692,8 +692,8 @@ bb.0: ; GFX908-LABEL: name: a_to_a ; GFX908: $agpr1 = IMPLICIT_DEF - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0 ; GFX90A-LABEL: name: a_to_a ; GFX90A: $agpr1 = IMPLICIT_DEF @@ -713,11 +713,11 @@ ; GFX908-LABEL: name: a2_to_a2_kill ; GFX908: liveins: $agpr0_agpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr1_agpr2 - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 ; GFX90A-LABEL: name: a2_to_a2_kill ; GFX90A: liveins: $agpr0_agpr1 @@ -740,12 +740,12 @@ ; GFX908-LABEL: name: a3_to_a3_nonoverlap_kill ; GFX908: liveins: $agpr4_agpr5_agpr6 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr4_agpr5_agpr6 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr4_agpr5_agpr6 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; GFX90A-LABEL: name: a3_to_a3_nonoverlap_kill ; GFX90A: liveins: $agpr4_agpr5_agpr6 @@ -767,11 +767,11 @@ ; GFX908-LABEL: name: a3_to_a3_overlap_kill ; GFX908: liveins: $agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr32, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 ; GFX90A-LABEL: name: a3_to_a3_overlap_kill @@ -794,13 +794,13 @@ bb.0: ; GFX908-LABEL: name: a4_to_a4 ; GFX908: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 ; GFX90A-LABEL: name: a4_to_a4 ; GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF @@ -823,13 +823,13 @@ ; GFX908-LABEL: name: a4_to_a4_overlap ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 ; GFX90A-LABEL: name: a4_to_a4_overlap ; GFX90A: liveins: $agpr0_agpr1_agpr2_agpr3 @@ -850,22 +850,22 @@ bb.0: ; GFX908-LABEL: name: a8_to_a8 ; GFX908: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-LABEL: name: a8_to_a8 ; GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF @@ -891,38 +891,38 @@ ; GFX908-LABEL: name: a16_to_a16 ; GFX908: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-LABEL: name: a16_to_a16 ; GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF @@ -961,8 +961,8 @@ ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: $agpr1 = IMPLICIT_DEF - ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0 ; GFX90A-LABEL: name: a_to_a_spill ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254 @@ -986,14 +986,14 @@ ; GFX908: liveins: $agpr0, $sgpr2_sgpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-LABEL: name: copy_sgpr_to_agpr_tuple ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3 @@ -1023,14 +1023,14 @@ ; GFX908: liveins: $agpr0, $sgpr2_sgpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 ; GFX90A-LABEL: name: copy_sgpr_to_agpr_tuple_kill ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3 @@ -1061,14 +1061,14 @@ ; GFX908: liveins: $agpr0, $agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX90A-LABEL: name: copy_agpr_to_agpr_tuple ; GFX90A: liveins: $agpr0, $agpr2_agpr3 @@ -1095,14 +1095,14 @@ ; GFX908: liveins: $agpr0, $agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec - ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 ; GFX90A-LABEL: name: copy_agpr_to_agpr_tuple_kill ; GFX90A: liveins: $agpr0, $agpr2_agpr3 Index: llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -0,0 +1,525 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s + +; This testcase would fail due to not having a free VGPR available to +; copy between AGPRs. +define void @no_free_vgprs_at_agpr_copy(float %v0, float %v1) #0 { +; GFX908-LABEL: no_free_vgprs_at_agpr_copy: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v33, v0 +; GFX908-NEXT: v_mov_b32_e32 v34, v1 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; def v[0:31] a[0:15] +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_accvgpr_read_b32 v32, a15 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a14 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a13 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a12 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a11 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a10 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a9 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a8 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a7 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a6 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a5 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a4 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a3 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a0 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v32 +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v34, a[16:31] +; GFX908-NEXT: s_nop 7 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v38, a12 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v37, a13 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v36, a14 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v35, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; copy +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a16, v32 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a11, v39 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a12, v38 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a13, v37 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a14, v36 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a15, v35 ; Reload Reuse +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; copy +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; use a3 v[0:31] +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: no_free_vgprs_at_agpr_copy: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v32, v0 +; GFX90A-NEXT: v_mov_b32_e32 v33, v1 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v[0:31] a[0:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a15 +; GFX90A-NEXT: v_accvgpr_mov_b32 a30, a14 +; GFX90A-NEXT: v_accvgpr_mov_b32 a29, a13 +; GFX90A-NEXT: v_accvgpr_mov_b32 a28, a12 +; GFX90A-NEXT: v_accvgpr_mov_b32 a27, a11 +; GFX90A-NEXT: v_accvgpr_mov_b32 a26, a10 +; GFX90A-NEXT: v_accvgpr_mov_b32 a25, a9 +; GFX90A-NEXT: v_accvgpr_mov_b32 a24, a8 +; GFX90A-NEXT: v_accvgpr_mov_b32 a23, a7 +; GFX90A-NEXT: v_accvgpr_mov_b32 a22, a6 +; GFX90A-NEXT: v_accvgpr_mov_b32 a21, a5 +; GFX90A-NEXT: v_accvgpr_mov_b32 a20, a4 +; GFX90A-NEXT: v_accvgpr_mov_b32 a19, a3 +; GFX90A-NEXT: v_accvgpr_mov_b32 a18, a2 +; GFX90A-NEXT: v_accvgpr_mov_b32 a17, a1 +; GFX90A-NEXT: v_accvgpr_mov_b32 a16, a0 +; GFX90A-NEXT: s_nop 1 +; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v32, v33, a[16:31] +; GFX90A-NEXT: s_nop 7 +; GFX90A-NEXT: s_nop 2 +; GFX90A-NEXT: buffer_store_dword a0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_store_dword a1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX90A-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; copy +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_mov_b32 a16, a1 +; GFX90A-NEXT: buffer_load_dword a0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: buffer_load_dword a9, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a14, v35 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a15, v34 ; Reload Reuse +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; copy +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a2 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use a3 v[0:31] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] + %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1","=${v[0:31]},=${a[0:15]}"() + %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0 + %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1 + %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0) + %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0) + %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma) + call void asm sideeffect "; use $0 $1","{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0) + ret void +} + +; Check that we do make use of v32 if there are no AGPRs present in the function +define amdgpu_kernel void @no_agpr_no_reserve(<32 x i32> addrspace(1)* %arg) #0 { +; GFX908-LABEL: no_agpr_no_reserve: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX908-NEXT: v_lshlrev_b32_e32 v32, 7, v0 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16 +; GFX908-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] +; GFX908-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48 +; GFX908-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:32 +; GFX908-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:80 +; GFX908-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:64 +; GFX908-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112 +; GFX908-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96 +; GFX908-NEXT: s_waitcnt vmcnt(7) +; GFX908-NEXT: v_add_u32_e32 v3, v3, v3 +; GFX908-NEXT: v_add_u32_e32 v2, v2, v2 +; GFX908-NEXT: v_add_u32_e32 v1, v1, v1 +; GFX908-NEXT: v_add_u32_e32 v0, v0, v0 +; GFX908-NEXT: s_waitcnt vmcnt(6) +; GFX908-NEXT: v_add_u32_e32 v7, v7, v7 +; GFX908-NEXT: v_add_u32_e32 v6, v6, v6 +; GFX908-NEXT: v_add_u32_e32 v5, v5, v5 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_add_u32_e32 v31, v31, v31 +; GFX908-NEXT: v_add_u32_e32 v30, v30, v30 +; GFX908-NEXT: v_add_u32_e32 v29, v29, v29 +; GFX908-NEXT: v_add_u32_e32 v28, v28, v28 +; GFX908-NEXT: v_add_u32_e32 v4, v4, v4 +; GFX908-NEXT: v_add_u32_e32 v11, v11, v11 +; GFX908-NEXT: v_add_u32_e32 v10, v10, v10 +; GFX908-NEXT: v_add_u32_e32 v9, v9, v9 +; GFX908-NEXT: v_add_u32_e32 v8, v8, v8 +; GFX908-NEXT: v_add_u32_e32 v15, v15, v15 +; GFX908-NEXT: v_add_u32_e32 v14, v14, v14 +; GFX908-NEXT: v_add_u32_e32 v13, v13, v13 +; GFX908-NEXT: v_add_u32_e32 v12, v12, v12 +; GFX908-NEXT: v_add_u32_e32 v19, v19, v19 +; GFX908-NEXT: v_add_u32_e32 v18, v18, v18 +; GFX908-NEXT: v_add_u32_e32 v17, v17, v17 +; GFX908-NEXT: v_add_u32_e32 v16, v16, v16 +; GFX908-NEXT: v_add_u32_e32 v23, v23, v23 +; GFX908-NEXT: v_add_u32_e32 v22, v22, v22 +; GFX908-NEXT: v_add_u32_e32 v21, v21, v21 +; GFX908-NEXT: v_add_u32_e32 v20, v20, v20 +; GFX908-NEXT: v_add_u32_e32 v27, v27, v27 +; GFX908-NEXT: v_add_u32_e32 v26, v26, v26 +; GFX908-NEXT: v_add_u32_e32 v25, v25, v25 +; GFX908-NEXT: v_add_u32_e32 v24, v24, v24 +; GFX908-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96 +; GFX908-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112 +; GFX908-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:64 +; GFX908-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:80 +; GFX908-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:32 +; GFX908-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48 +; GFX908-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] +; GFX908-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16 +; GFX908-NEXT: s_endpgm +; +; GFX90A-LABEL: no_agpr_no_reserve: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90A-NEXT: v_lshlrev_b32_e32 v32, 7, v0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16 +; GFX90A-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] +; GFX90A-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48 +; GFX90A-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:32 +; GFX90A-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:80 +; GFX90A-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:64 +; GFX90A-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112 +; GFX90A-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96 +; GFX90A-NEXT: s_waitcnt vmcnt(7) +; GFX90A-NEXT: v_add_u32_e32 v3, v3, v3 +; GFX90A-NEXT: v_add_u32_e32 v2, v2, v2 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v1 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v0 +; GFX90A-NEXT: s_waitcnt vmcnt(6) +; GFX90A-NEXT: v_add_u32_e32 v7, v7, v7 +; GFX90A-NEXT: v_add_u32_e32 v6, v6, v6 +; GFX90A-NEXT: v_add_u32_e32 v5, v5, v5 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_u32_e32 v31, v31, v31 +; GFX90A-NEXT: v_add_u32_e32 v30, v30, v30 +; GFX90A-NEXT: v_add_u32_e32 v29, v29, v29 +; GFX90A-NEXT: v_add_u32_e32 v28, v28, v28 +; GFX90A-NEXT: v_add_u32_e32 v4, v4, v4 +; GFX90A-NEXT: v_add_u32_e32 v11, v11, v11 +; GFX90A-NEXT: v_add_u32_e32 v10, v10, v10 +; GFX90A-NEXT: v_add_u32_e32 v9, v9, v9 +; GFX90A-NEXT: v_add_u32_e32 v8, v8, v8 +; GFX90A-NEXT: v_add_u32_e32 v15, v15, v15 +; GFX90A-NEXT: v_add_u32_e32 v14, v14, v14 +; GFX90A-NEXT: v_add_u32_e32 v13, v13, v13 +; GFX90A-NEXT: v_add_u32_e32 v12, v12, v12 +; GFX90A-NEXT: v_add_u32_e32 v19, v19, v19 +; GFX90A-NEXT: v_add_u32_e32 v18, v18, v18 +; GFX90A-NEXT: v_add_u32_e32 v17, v17, v17 +; GFX90A-NEXT: v_add_u32_e32 v16, v16, v16 +; GFX90A-NEXT: v_add_u32_e32 v23, v23, v23 +; GFX90A-NEXT: v_add_u32_e32 v22, v22, v22 +; GFX90A-NEXT: v_add_u32_e32 v21, v21, v21 +; GFX90A-NEXT: v_add_u32_e32 v20, v20, v20 +; GFX90A-NEXT: v_add_u32_e32 v27, v27, v27 +; GFX90A-NEXT: v_add_u32_e32 v26, v26, v26 +; GFX90A-NEXT: v_add_u32_e32 v25, v25, v25 +; GFX90A-NEXT: v_add_u32_e32 v24, v24, v24 +; GFX90A-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96 +; GFX90A-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112 +; GFX90A-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:64 +; GFX90A-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:80 +; GFX90A-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:32 +; GFX90A-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48 +; GFX90A-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] +; GFX90A-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16 +; GFX90A-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds <32 x i32>, <32 x i32> addrspace(1)* %arg, i32 %id + %load = load <32 x i32>, <32 x i32> addrspace(1)* %gep + %add = add <32 x i32> %load, %load + store <32 x i32> %add, <32 x i32> addrspace(1)* %gep + ret void +} + +; FIXME: This case is broken. The asm value passed in v32 is live +; through the range where the reserved def for the copy is introduced, +; clobbering the user value. +define void @v32_asm_def_use(float %v0, float %v1) #0 { +; GFX908-LABEL: v32_asm_def_use: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v33, v1 +; GFX908-NEXT: v_mov_b32_e32 v34, v0 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; def v[0:31] a[0:15] +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_accvgpr_read_b32 v32, a15 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a14 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a13 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a12 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a11 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a10 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a9 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a8 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a7 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a6 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a5 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a4 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a3 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v32 +; GFX908-NEXT: v_accvgpr_read_b32 v32, a0 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v32 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; def v32 +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; copy +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 +; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31] +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: v_accvgpr_write_b32 a32, v32 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; copy +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: s_nop 7 +; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 +; GFX908-NEXT: s_nop 1 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; use a3 v[0:31] +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: ;;#ASMSTART +; GFX908-NEXT: ; use v32 +; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: v32_asm_def_use: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v34, v0 +; GFX90A-NEXT: v_mov_b32_e32 v33, v1 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v[0:31] a[0:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a15 +; GFX90A-NEXT: v_accvgpr_mov_b32 a30, a14 +; GFX90A-NEXT: v_accvgpr_mov_b32 a29, a13 +; GFX90A-NEXT: v_accvgpr_mov_b32 a28, a12 +; GFX90A-NEXT: v_accvgpr_mov_b32 a27, a11 +; GFX90A-NEXT: v_accvgpr_mov_b32 a26, a10 +; GFX90A-NEXT: v_accvgpr_mov_b32 a25, a9 +; GFX90A-NEXT: v_accvgpr_mov_b32 a24, a8 +; GFX90A-NEXT: v_accvgpr_mov_b32 a23, a7 +; GFX90A-NEXT: v_accvgpr_mov_b32 a22, a6 +; GFX90A-NEXT: v_accvgpr_mov_b32 a21, a5 +; GFX90A-NEXT: v_accvgpr_mov_b32 a20, a4 +; GFX90A-NEXT: v_accvgpr_mov_b32 a19, a3 +; GFX90A-NEXT: v_accvgpr_mov_b32 a18, a2 +; GFX90A-NEXT: v_accvgpr_mov_b32 a17, a1 +; GFX90A-NEXT: v_accvgpr_mov_b32 a16, a0 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v32 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; copy +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v35, a32 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_mov_b32 a32, a1 +; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31] +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; copy +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_write_b32 a32, v35 ; Reload Reuse +; GFX90A-NEXT: s_nop 7 +; GFX90A-NEXT: s_nop 1 +; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a2 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use a3 v[0:31] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use v32 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] + %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1","=${v[0:31]},=${a[0:15]}"() + %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0 + %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1 + %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0) + %v32 = call i32 asm sideeffect "; def $0","=${v32}"() + %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0) + %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma) + call void asm sideeffect "; use $0 $1","{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0) + call void asm sideeffect "; use $0","${v32}"(i32 %v32) + ret void +} + +declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1 +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +attributes #0 = { "amdgpu-waves-per-eu"="6,6" } +attributes #1 = { convergent nounwind readnone willreturn } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/agpr-copy-no-vgprs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/agpr-copy-no-vgprs.mir +++ llvm/test/CodeGen/AMDGPU/agpr-copy-no-vgprs.mir @@ -1,9 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# XUN: not --crash llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX908 %s +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s # RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s -# GFX908: LLVM ERROR: Error while trying to spill VGPR0 from class VGPR_32: Cannot scavenge register without an emergency spill slot! - --- name: no_free_vgprs_for_copy_a32_to_a32 tracksRegLiveness: true @@ -11,6 +9,12 @@ bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0 + ; GFX908-LABEL: name: no_free_vgprs_for_copy_a32_to_a32 + ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 ; GFX90A-LABEL: name: no_free_vgprs_for_copy_a32_to_a32 ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0 ; GFX90A-NEXT: {{ $}} @@ -26,6 +30,14 @@ bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0_agpr1 + ; GFX908-LABEL: name: no_free_vgprs_for_copy_a64_to_a64 + ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0_agpr1 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr2_agpr3 + ; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 ; GFX90A-LABEL: name: no_free_vgprs_for_copy_a64_to_a64 ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $agpr0_agpr1 ; GFX90A-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir +++ llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir @@ -1,4 +1,5 @@ -# RUN: not --crash llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s # RUN: not --crash llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s # CHECK: LLVM ERROR: Error while trying to spill VGPR0 from class VGPR_32: Cannot scavenge register without an emergency spill slot! @@ -10,11 +11,12 @@ bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 - ; GFX90A-LABEL: name: no_free_vgprs_for_copy_a32_to_a32 - ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec - ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 + ; GFX908-LABEL: name: no_free_vgprs_for_copy_s32_to_a32 + ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr8, implicit $exec + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 $agpr1 = COPY $sgpr8 S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 ... @@ -26,12 +28,14 @@ bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9 - ; GFX90A-LABEL: name: no_free_vgprs_for_copy_a64_to_a64 - ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9 - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr2_agpr3, implicit $agpr0_agpr1 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1 - ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 + ; GFX908-LABEL: name: no_free_vgprs_for_copy_s64_to_a64 + ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr8_sgpr9 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr2_agpr3 + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 $agpr2_agpr3 = COPY $sgpr8_sgpr9 S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 ... Index: llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir +++ llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir @@ -121,7 +121,7 @@ --- # GCN-LABEL: name: alloc_vgpr_1024 -# GFX908: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 = IMPLICIT_DEF +# GFX908: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF name: alloc_vgpr_1024 tracksRegLiveness: true liveins: Index: llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -420,9 +420,9 @@ ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: ; GCN-DAG: s_addk_i32 s32, 0x400{{$}} ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} +; GCN-DAG: buffer_load_dword [[TMP_REG:v[0-9]+]], off, s[0:3], s33{{$}} -; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} +; GCN: buffer_store_dword [[TMP_REG]], off, s[0:3], s32{{$}} ; GCN: s_swappc_b64