diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7599,8 +7599,19 @@ } bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { - return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && - MI.modifiesRegister(AMDGPU::EXEC, &RI); + if (MI.isTerminator()) + return false; + switch (MI.getOpcode()) { + case AMDGPU::COPY: + case AMDGPU::SI_IF: + case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_LOOP: + case AMDGPU::SI_ELSE: + case AMDGPU::SI_END_CF: + return false; + default: + return MI.modifiesRegister(AMDGPU::EXEC, &RI); + } } MachineInstrBuilder diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir --- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir @@ -3,6 +3,7 @@ # Test that MachineSink pass respects block prologues when sinking instructions. # Specifically an instruction must not be sunk before exec mask manipulation. +# With the exception that control flow pseudo instructions are not part of the prologue. --- name: _amdgpu_hs_main @@ -120,3 +121,133 @@ S_ENDPGM 0 ... +--- +name: test_sink_after_end_cf +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX10-LABEL: name: test_sink_after_end_cf + ; GFX10: bb.0.entry: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B32_]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY3]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX10-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY4]](s32), [[S_MOV_B32_1]], implicit $exec + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_LT_I32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_NOP 0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: %6:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: %8:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_NOP 0, implicit %6, implicit %8 + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_NOP 0 + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: successors: %bb.6(0x80000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_NOP 0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.7: + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_ENDPGM 0 + bb.0.entry: + successors: %bb.1(0x80000000) + + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + %101:vgpr_32 = COPY $vgpr0 + %102:vgpr_32 = COPY $vgpr1 + %15:vreg_64 = COPY $vgpr2_vgpr3 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + %20:sreg_32 = S_MOV_B32 0 + %30:vreg_64 = COPY %20 + %29:vgpr_32 = GLOBAL_LOAD_DWORD %30, 0, 0, implicit $exec :: (load (s32), addrspace 1) + %6:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %29, 0, %101, 0, %102, 0, 0, implicit $mode, implicit $exec + %31:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec :: (load (s32), addrspace 1) + %7:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %31, 0, %101, 0, %102, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32(s32) = COPY $vgpr0 + %23:sreg_32 = S_MOV_B32 1 + %24:sreg_32 = V_CMP_LT_I32_e64 %16(s32), %23, implicit $exec + %0:sreg_32 = COPY %24 + %5:sreg_32 = SI_IF %0, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + S_NOP 0 + + bb.3: + successors: %bb.4(0x40000000), %bb.6(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + SI_END_CF %5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_NOP 0, implicit %6, implicit %7 + S_CBRANCH_EXECZ %bb.6, implicit $exec + + bb.4: + successors: %bb.5(0x04000000), %bb.4(0x7c000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + S_NOP 0 + S_CBRANCH_EXECZ %bb.4, implicit $exec + + bb.5: + successors: %bb.6(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + S_NOP 0 + + bb.6: + successors: %bb.7(0x04000000), %bb.1(0x7c000000) + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + + S_CBRANCH_VCCZ %bb.1, implicit $vcc + + bb.7: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc + S_ENDPGM 0 +...