Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -109,9 +109,6 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, const MachineBasicBlock &To) const { - if (From.succ_empty()) - return false; - unsigned NumInstr = 0; const MachineFunction *MF = From.getParent(); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2479,6 +2479,10 @@ if (MI.mayStore() && isSMRD(MI)) return true; // scalar store or atomic + // This will terminate the function when other lanes may need to continue. + if (MI.isReturn()) + return true; + // These instructions cause shader I/O that may cause hardware lockups // when executed with an empty EXEC mask. // Index: test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir @@ -0,0 +1,194 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s + +--- +name: skip_branch_taildup_endpgm +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: skip_branch_taildup_endpgm + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec + ; CHECK: S_WAITCNT 127 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec + ; CHECK: renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec + ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: S_WAITCNT 112 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc + ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: S_BRANCH %bb.4 + ; CHECK: bb.2: + ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + ; CHECK: S_ENDPGM 0 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec + ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec + ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.4: + ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec + ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec + ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7 + + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec + S_WAITCNT 127 + $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec + renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec + renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec + renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + S_WAITCNT 112 + V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc + SI_MASK_BRANCH %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc + SI_MASK_BRANCH %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.4: + liveins: $sgpr2_sgpr3 + + $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + S_ENDPGM 0 + + bb.1: + successors: %bb.3, %bb.4 + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec + $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 + $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec + renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc + SI_MASK_BRANCH %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.3: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec + $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 + $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec + $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + S_ENDPGM 0 + +... + +--- +name: skip_branch_taildup_ret +body: | + ; CHECK-LABEL: name: skip_branch_taildup_ret + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: S_WAITCNT 0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc + ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: S_BRANCH %bb.4 + ; CHECK: bb.2: + ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec + ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.4: + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec + ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc + ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31 + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr30_sgpr31, $vgpr1_vgpr2 + + S_WAITCNT 0 + V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc + SI_MASK_BRANCH %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 + + renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc + SI_MASK_BRANCH %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.4: + liveins: $sgpr6_sgpr7, $sgpr30_sgpr31 + + $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc + renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31 + + bb.1: + successors: %bb.3, %bb.4 + liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 + + renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec + renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc + SI_MASK_BRANCH %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.3: + liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 + + renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec + $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc + renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31 + +...