Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3108,7 +3108,8 @@ // // However, executing them with EXEC = 0 causes them to operate on undefined // data, which we avoid by returning true here. - if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32) + if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || + Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32) return true; return false; Index: llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir +++ llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir @@ -109,3 +109,56 @@ bb.2: S_ENDPGM 0 ... + +--- + +name: need_skip_writelane_b32 +body: | + ; CHECK-LABEL: name: need_skip_writelane_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $sgpr0 = IMPLICIT_DEF + $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0 + + bb.2: + S_ENDPGM 0 +... + +--- +name: need_skip_readlane_b32 +body: | + ; CHECK-LABEL: name: need_skip_readlane_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = IMPLICIT_DEF + $sgpr0 = V_READLANE_B32 $vgpr0, 0 + + bb.2: + S_ENDPGM 0 +...