diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -637,8 +637,9 @@ // Only loop through if there are any free registers left. We don't want to // spill. while (RegNo--) { - Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0, - /* AllowSpill */ false); + Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, + /* RestoreAfter */ false, 0, + /* AllowSpill */ false); if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) break; Tmp = Tmp2; @@ -7919,10 +7920,11 @@ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); // If available, prefer to use vcc. - Register UnusedCarry = - !RS.isRegUsed(AMDGPU::VCC) - ? Register(RI.getVCC()) - : RS.scavengeRegister(RI.getBoolRC(), I, 0, /* AllowSpill */ false); + Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) + ? Register(RI.getVCC()) + : RS.scavengeRegisterBackwards( + *RI.getBoolRC(), I, /* RestoreAfter */ false, + 0, /* AllowSpill */ false); // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -977,8 +977,8 @@ ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 ; GFX90A-LABEL: name: a3_to_a3_overlap_kill diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll @@ -65,8 +65,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_16x16x2bf16: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_16x16x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 @@ -107,8 +107,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_32x32x4bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll @@ -8,8 +8,8 @@ ; GCN-LABEL: {{^}}test_mfma_i32_32x32x8i8: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_i32_32x32x8i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -79,8 +79,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 @@ -119,8 +119,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x2f32: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x2f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 @@ -321,8 +321,8 @@ ; GCN-LABEL: {{^}}test_mfma_i32_16x16x4i8: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; GCN: s_load_dwordx16 -; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: s_load_dwordx16 +; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_16x16x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3