diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -100,7 +100,7 @@ // These instructions are potentially expensive even if EXEC = 0. if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || - I->getOpcode() == AMDGPU::S_WAITCNT) + TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT) return true; ++NumInstr; diff --git a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll --- a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll @@ -14,11 +14,12 @@ ; GCN-DAG: v_cmp_lt_f32_e32 vcc, ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]] ; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]] +; GCN-NEXT: s_cbranch_execz BB0_{{[0-9]+}} ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4 ; GCN: ds_write_b32 -; GCN: ; %bb.{{[0-9]+}}: +; GCN: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock ; GCN-NEXT: s_endpgm ; GCN-NEXT: .Lfunc_end define amdgpu_ps void @ham(float %arg, float %arg1) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir rename from llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir rename to llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir --- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir @@ -56,3 +56,31 @@ bb.2: S_ENDPGM 0 ... + +--- + +name: skip_execz_ds +body: | + ; CHECK-LABEL: name: skip_execz_ds + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: DS_WRITE_B32 $vgpr0, $vgpr0, 0, 0, implicit $m0, implicit $exec + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + SI_MASK_BRANCH %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + DS_WRITE_B32 $vgpr0, $vgpr0, 0, 0, implicit $m0, implicit $exec + + bb.2: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -65,6 +65,7 @@ ; GCN: BB{{[0-9]+_[0-9]+}}: ; %else ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc +; GCN-NEXT: s_cbranch_execz BB1_{{[0-9]+}} ; GCN-NEXT: ; %unreachable.bb ; GCN: ds_write_b32 diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -3,12 +3,13 @@ ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator: ; GCN: v_cmp_eq_u32 ; GCN: s_and_saveexec_b64 +; GCN-NEXT: s_cbranch_execz BB0_{{[0-9]+}} ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %UnifiedReturnBlock +; GCN-NEXT: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock ; GCN: s_endpgm define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 { @@ -28,12 +29,13 @@ ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order: ; GCN: v_cmp_ne_u32 ; GCN: s_and_saveexec_b64 +; GCN-NEXT: s_cbranch_execz BB1_{{[0-9]+}} ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN: ; %bb.{{[0-9]+}}: +; GCN: BB1_{{[0-9]+}}: ; GCN-NEXT: s_endpgm define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 { bb: