diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -119,7 +119,7 @@ // special tokens like SCMEM_LDS (needed for buffer load to LDS). enum RegisterMapping { SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets. - AGPR_OFFSET = 226, // Maximum programmable ArchVGPRs across all targets. + AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets. SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets. NUM_EXTRA_VGPRS = 1, // A reserved slot for DS. EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses. diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir @@ -335,7 +335,6 @@ --- # agpr should be disjoint and tracked separately from vgpr -# vgpr226 and agpr0 erroneously share waitcnt storage index, so a waitcnt is inserted before store of agpr0 when it is not needed name: high_register_collision @@ -347,7 +346,6 @@ ; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec