Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3113,7 +3113,14 @@ // // However, executing them with EXEC = 0 causes them to operate on undefined // data, which we avoid by returning true here. - if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32) + if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || + Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 || + Opcode == AMDGPU::V_READLANE_B32_vi || + Opcode == AMDGPU::V_READLANE_B32_gfx6_gfx7 || + Opcode == AMDGPU::V_READLANE_B32_gfx10 || + Opcode == AMDGPU::V_WRITELANE_B32_vi || + Opcode == AMDGPU::V_WRITELANE_B32_gfx6_gfx7 || + Opcode == AMDGPU::V_WRITELANE_B32_gfx10) return true; return false; Index: llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir +++ /dev/null @@ -1,111 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s -# Make sure mandatory skips are not removed around mode defs. -# FIXME: -amdgpu-skip-threshold seems to be backwards. - ---- - -name: need_skip_setreg_imm32_b32 -body: | - ; CHECK-LABEL: name: need_skip_setreg_imm32_b32 - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - S_CBRANCH_EXECZ %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode - - bb.2: - S_ENDPGM 0 -... - ---- - -name: need_skip_setreg_b32 -body: | - ; CHECK-LABEL: name: need_skip_setreg_b32 - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - liveins: $sgpr0 - successors: %bb.1, %bb.2 - S_CBRANCH_EXECZ %bb.2, implicit $exec - - bb.1: - liveins: $sgpr0 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode - - bb.2: - S_ENDPGM 0 -... - ---- - -name: need_skip_denorm_mode -body: | - ; CHECK-LABEL: name: need_skip_denorm_mode - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: S_DENORM_MODE 3, implicit-def $mode, implicit $mode - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - S_CBRANCH_EXECZ %bb.2, implicit $exec - - bb.1: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - S_DENORM_MODE 3, implicit-def $mode, implicit $mode - - bb.2: - S_ENDPGM 0 -... - ---- - -name: need_skip_round_mode -body: | - ; CHECK-LABEL: name: need_skip_round_mode - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: S_ROUND_MODE 3, implicit-def $mode, implicit $mode - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - S_CBRANCH_EXECZ %bb.2, implicit $exec - - bb.1: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - S_ROUND_MODE 3, implicit-def $mode, implicit $mode - - bb.2: - S_ENDPGM 0 -... Index: llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir @@ -0,0 +1,318 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s +# Make sure mandatory skips are not removed around mode defs. +# FIXME: -amdgpu-skip-threshold seems to be backwards. + +--- + +name: need_skip_setreg_imm32_b32 +body: | + ; CHECK-LABEL: name: need_skip_setreg_imm32_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_setreg_b32 +body: | + ; CHECK-LABEL: name: need_skip_setreg_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + liveins: $sgpr0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_denorm_mode +body: | + ; CHECK-LABEL: name: need_skip_denorm_mode + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_DENORM_MODE 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_DENORM_MODE 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_round_mode +body: | + ; CHECK-LABEL: name: need_skip_round_mode + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_ROUND_MODE 3, implicit-def $mode, implicit $mode + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_ROUND_MODE 3, implicit-def $mode, implicit $mode + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_writelane_b32 +body: | + ; CHECK-LABEL: name: need_skip_writelane_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $sgpr0 = IMPLICIT_DEF + $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0 + + bb.2: + S_ENDPGM 0 +... + +--- + +name: need_skip_writelane_vi_b32 +body: | + ; CHECK-LABEL: name: need_skip_writelane_vi_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, $vgpr0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $sgpr0 = IMPLICIT_DEF + $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, $vgpr0 + + bb.2: + S_ENDPGM 0 +... + +--- +name: need_skip_writelane_gfx6_gfx7_b32 +body: | + ; CHECK-LABEL: name: need_skip_writelane_gfx6_gfx7_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr0, 0, $vgpr0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $sgpr0 = IMPLICIT_DEF + $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr0, 0, $vgpr0 + + bb.2: + S_ENDPGM 0 +... +--- +name: need_skip_writelane_gfx10_b32 +body: | + ; CHECK-LABEL: name: need_skip_writelane_gfx10_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx10 $sgpr0, 0, $vgpr0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $sgpr0 = IMPLICIT_DEF + $vgpr0 = V_WRITELANE_B32_gfx10 $sgpr0, 0, $vgpr0 + + bb.2: + S_ENDPGM 0 +... + +--- +name: need_skip_readlane_b32 +body: | + ; CHECK-LABEL: name: need_skip_readlane_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = IMPLICIT_DEF + $sgpr0 = V_READLANE_B32 $vgpr0, 0 + + bb.2: + S_ENDPGM 0 +... + +--- +name: need_skip_readlane_gfx6_gfx7_b32 +body: | + ; CHECK-LABEL: name: need_skip_readlane_gfx6_gfx7_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK: $sgpr0 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = IMPLICIT_DEF + $sgpr0 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 0 + + bb.2: + S_ENDPGM 0 +... +--- +name: need_skip_readlane_vi_b32 +body: | + ; CHECK-LABEL: name: need_skip_readlane_vi_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = IMPLICIT_DEF + $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + + bb.2: + S_ENDPGM 0 +... +--- +name: need_skip_readlane_gfx10_b32 +body: | + ; CHECK-LABEL: name: need_skip_readlane_gfx10_b32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = IMPLICIT_DEF + $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + + bb.2: + S_ENDPGM 0 +...