diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -130,10 +130,29 @@ return false; } +static bool readsExecAsData(const MachineInstr &MI) { + if (MI.isCompare()) + return true; + + switch (MI.getOpcode()) { + default: + break; + case AMDGPU::V_READFIRSTLANE_B32: + case AMDGPU::V_CNDMASK_B64_PSEUDO: + case AMDGPU::V_CNDMASK_B32_dpp: + case AMDGPU::V_CNDMASK_B32_e32: + case AMDGPU::V_CNDMASK_B32_e64: + case AMDGPU::V_CNDMASK_B32_sdwa: + return true; + } + + return false; +} + bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { // Any implicit use of exec by VALU is not a real register read. return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && - isVALU(*MO.getParent()); + isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent()); } bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir @@ -0,0 +1,245 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: hoist_move +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: hoist_move + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + S_BRANCH %bb.1 + + bb.1: + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_cmp +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_cmp + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + S_BRANCH %bb.1 + + bb.1: + %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_readfirstlane +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_readfirstlane + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF]], implicit $exec + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %1:sgpr_32 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_cndmask_e64 +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_cndmask_e64 + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF1]], implicit $exec + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64_xexec = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %0, %1, implicit $exec + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_cndmask_e32 +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_cndmask_e32 + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 [[DEF]], [[DEF]], implicit undef $vcc, implicit $exec + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64_xexec = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_CNDMASK_B32_e32 %0, %0, implicit undef $vcc, implicit $exec + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_cndmask_dpp +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_cndmask_dpp + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp [[DEF]], 0, [[DEF]], 0, [[DEF]], 1, 15, 15, 10, implicit $exec, implicit undef $vcc + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64_xexec = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_CNDMASK_B32_dpp %0:vgpr_32, 0, %0:vgpr_32, 0, %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: no_hoist_cndmask_sdwa +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_hoist_cndmask_sdwa + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[DEF]], 0, [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc + ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64_xexec = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, %0:vgpr_32, 0, %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc + $exec = S_OR_B64 $exec, 1, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -54,17 +54,17 @@ ; GFX9-LABEL: lsr_order_mul24_1: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 -; GFX9-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GFX9-NEXT: v_and_b32_e32 v5, 1, v18 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 +; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 +; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] ; GFX9-NEXT: s_cbranch_execz .LBB1_3 ; GFX9-NEXT: ; %bb.1: ; %bb19 ; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6 -; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v6 -; GFX9-NEXT: v_and_b32_e32 v8, 1, v18 ; GFX9-NEXT: v_add_u32_e32 v4, v4, v0 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v7 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 -; GFX9-NEXT: v_lshl_add_u32 v7, v4, 2, v3 +; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX9-NEXT: v_lshl_add_u32 v6, v4, 2, v3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX9-NEXT: v_lshlrev_b32_e32 v8, 2, v2 ; GFX9-NEXT: v_add_u32_e32 v9, v17, v12 ; GFX9-NEXT: s_mov_b64 s[10:11], 0 @@ -76,7 +76,7 @@ ; GFX9-NEXT: v_add_u32_e32 v12, v17, v0 ; GFX9-NEXT: v_add_u32_e32 v19, v9, v0 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_madak_f32 v3, v3, v6, 0x3727c5ac +; GFX9-NEXT: v_madak_f32 v3, v3, v7, 0x3727c5ac ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GFX9-NEXT: v_mul_u32_u24_e32 v18, v3, v5 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v16 @@ -97,8 +97,8 @@ ; GFX9-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] -; GFX9-NEXT: ds_write_b32 v7, v3 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 +; GFX9-NEXT: ds_write_b32 v6, v3 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[10:11] ; GFX9-NEXT: s_cbranch_execnz .LBB1_2 ; GFX9-NEXT: .LBB1_3: ; %Flow3