diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1036,6 +1036,7 @@ else if (MI.getOpcode() == AMDGPU::S_ENDPGM || MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && + MI.getMF()->getTarget().getOptLevel() != CodeGenOpt::None && ScoreBrackets.getScoreRange(VS_CNT) != 0 && !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS)) ReleaseVGPRInsts.insert(&MI); diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir @@ -0,0 +1,543 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,OPT +# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,NOOPT + +--- | + define amdgpu_ps void @tbuffer_store1() { ret void } + define amdgpu_ps void @tbuffer_store2() { ret void } + define amdgpu_ps void @flat_store() { ret void } + define amdgpu_ps void @global_store() { ret void } + define amdgpu_ps void @buffer_store_format() { ret void } + define amdgpu_ps void @ds_write_b32() { ret void } + define amdgpu_ps void @global_store_dword() { ret void } + define amdgpu_ps void @multiple_basic_blocks1() { ret void } + define amdgpu_ps void @multiple_basic_blocks2() { ret void } + define amdgpu_ps void @multiple_basic_blocks3() { ret void } + define amdgpu_ps void @recursive_loop() { ret void } + define amdgpu_ps void @recursive_loop_vmem() { ret void } + define amdgpu_ps void @image_store() { ret void } + define amdgpu_ps void @scratch_store() { ret void } + define amdgpu_ps void @buffer_atomic() { ret void } + define amdgpu_ps void @flat_atomic() { ret void } + define amdgpu_ps void @global_atomic() { ret void } + define amdgpu_ps void @image_atomic() { ret void } +... + +--- +name: tbuffer_store1 +body: | + bb.0: + ; OPT-LABEL: name: tbuffer_store1 + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: tbuffer_store1 + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec + ; NOOPT-NEXT: S_ENDPGM 0 + TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec + S_ENDPGM 0 +... + +--- +name: tbuffer_store2 +body: | + bb.0: + ; OPT-LABEL: name: tbuffer_store2 + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: tbuffer_store2 + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; NOOPT-NEXT: S_ENDPGM 0 + TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + S_ENDPGM 0 +... + +--- +name: flat_store +body: | + bb.0: + ; CHECK-LABEL: name: flat_store + ; CHECK: S_WAITCNT 0 + ; CHECK-NEXT: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr + ; CHECK-NEXT: S_ENDPGM 0 + FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... + +--- +name: global_store +body: | + bb.0: + ; OPT-LABEL: name: global_store + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: global_store + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; NOOPT-NEXT: S_ENDPGM 0 + GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + S_WAITCNT_VSCNT undef $sgpr_null, 0 + S_ENDPGM 0 +... + +--- +name: buffer_store_format +body: | + bb.0: + ; OPT-LABEL: name: buffer_store_format + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: buffer_store_format + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec + ; NOOPT-NEXT: S_ENDPGM 0 + BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +--- +name: ds_write_b32 +body: | + bb.0: + ; CHECK-LABEL: name: ds_write_b32 + ; CHECK: renamable $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + renamable $vgpr0 = IMPLICIT_DEF + renamable $vgpr1 = IMPLICIT_DEF + DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec + S_ENDPGM 0 + +... +--- +name: global_store_dword +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + ; OPT-LABEL: name: global_store_dword + ; OPT: liveins: $vgpr0, $sgpr0_sgpr1 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_WAITCNT 0 + ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec + ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: global_store_dword + ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_WAITCNT 0 + ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec + ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec + ; NOOPT-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec + GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec + S_ENDPGM 0 +... + +--- +name: multiple_basic_blocks1 +body: | + ; CHECK-LABEL: name: multiple_basic_blocks1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 1015 + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1 + + renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit killed $scc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 + +... + + +# One block has a VMEM store as the last instruction, we should release the VGPRS +... +--- +name: multiple_basic_blocks2 +body: | + ; OPT-LABEL: name: multiple_basic_blocks2 + ; OPT: bb.0: + ; OPT-NEXT: successors: %bb.2(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_WAITCNT 0 + ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.2 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.1: + ; OPT-NEXT: successors: %bb.2(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.2 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.2: + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: multiple_basic_blocks2 + ; NOOPT: bb.0: + ; NOOPT-NEXT: successors: %bb.2(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_WAITCNT 0 + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.2 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.1: + ; NOOPT-NEXT: successors: %bb.2(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.2 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.2: + ; NOOPT-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.2 + + TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + + +# One parent block has a VMEM store, release VGPRs +--- +name: multiple_basic_blocks3 +body: | + ; OPT-LABEL: name: multiple_basic_blocks3 + ; OPT: bb.0: + ; OPT-NEXT: successors: %bb.2(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_WAITCNT 0 + ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.2 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.1: + ; OPT-NEXT: successors: %bb.2(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.2 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.2: + ; OPT-NEXT: successors: %bb.4(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_BRANCH %bb.4 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.3: + ; OPT-NEXT: successors: %bb.4(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.4 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.4: + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: multiple_basic_blocks3 + ; NOOPT: bb.0: + ; NOOPT-NEXT: successors: %bb.2(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_WAITCNT 0 + ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.2 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.1: + ; NOOPT-NEXT: successors: %bb.2(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.2 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.2: + ; NOOPT-NEXT: successors: %bb.4(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_BRANCH %bb.4 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.3: + ; NOOPT-NEXT: successors: %bb.4(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.4 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.4: + ; NOOPT-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.2 + + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.4 + + S_BRANCH %bb.4 + + bb.3: + successors: %bb.4 + + $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec + S_BRANCH %bb.4 + + bb.4: + S_ENDPGM 0 +... + +--- +name: recursive_loop +body: | + ; CHECK-LABEL: name: recursive_loop + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1 + + renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit killed $scc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +--- +name: recursive_loop_vmem +body: | + ; OPT-LABEL: name: recursive_loop_vmem + ; OPT: bb.0: + ; OPT-NEXT: successors: %bb.1(0x80000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_WAITCNT 0 + ; OPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + ; OPT-NEXT: S_BRANCH %bb.1 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.1: + ; OPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: S_WAITCNT 1015 + ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec + ; OPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + ; OPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc + ; OPT-NEXT: S_BRANCH %bb.2 + ; OPT-NEXT: {{ $}} + ; OPT-NEXT: bb.2: + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: recursive_loop_vmem + ; NOOPT: bb.0: + ; NOOPT-NEXT: successors: %bb.1(0x80000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_WAITCNT 0 + ; NOOPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + ; NOOPT-NEXT: S_BRANCH %bb.1 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.1: + ; NOOPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: S_WAITCNT 1015 + ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec + ; NOOPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + ; NOOPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc + ; NOOPT-NEXT: S_BRANCH %bb.2 + ; NOOPT-NEXT: {{ $}} + ; NOOPT-NEXT: bb.2: + ; NOOPT-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1 + + renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec + S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit killed $scc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +--- +name: image_store +body: | + bb.0: + ; OPT-LABEL: name: image_store + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: image_store + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) + ; NOOPT-NEXT: S_ENDPGM 0 + IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) + S_ENDPGM 0 +... + +--- +name: scratch_store +body: | + bb.0: + ; CHECK-LABEL: name: scratch_store + ; CHECK: S_WAITCNT 0 + ; CHECK-NEXT: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc + ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; CHECK-NEXT: S_ENDPGM 0 + renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc + SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... + +--- +name: buffer_atomic +body: | + bb.0: + ; OPT-LABEL: name: buffer_atomic + ; OPT: S_WAITCNT 0 + ; OPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; OPT-NEXT: S_NOP 0 + ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 + ; OPT-NEXT: S_ENDPGM 0 + ; + ; NOOPT-LABEL: name: buffer_atomic + ; NOOPT: S_WAITCNT 0 + ; NOOPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; NOOPT-NEXT: S_ENDPGM 0 + BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + S_ENDPGM 0 +... + +--- +name: flat_atomic +body: | + bb.0: + ; CHECK-LABEL: name: flat_atomic + ; CHECK: S_WAITCNT 0 + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr + ; CHECK-NEXT: S_ENDPGM 0 + renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... + + +--- +name: global_atomic +body: | + bb.0: + ; CHECK-LABEL: name: global_atomic + ; CHECK: S_WAITCNT 0 + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec + S_ENDPGM 0 +... + +--- +name: image_atomic +body: | + bb.0: + ; CHECK-LABEL: name: image_atomic + ; CHECK: S_WAITCNT 0 + ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; CHECK-NEXT: S_ENDPGM 0 + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + S_ENDPGM 0 +...