diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -582,7 +582,7 @@ if (!RemoveRedundantEndcf) return; - for (MachineInstr *MI : LoweredEndCf) { + for (MachineInstr *MI : reverse(LoweredEndCf)) { MachineBasicBlock &MBB = *MI->getParent(); auto Next = skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator())); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -697,5 +697,200 @@ bb.6: S_BRANCH %bb.4 +... + +--- +# While collapsing inner endcf, certain blocks ended up getting two S_BRANCH instructions. +# It happens in the absence of BranchFolding (mostly at -O0) when the irregularly placed BBs are traversed +# in the forward direction and the intervening block between a predecessor and its successor gets optimized +# away in subsequent iterations, leaving 2 S_BRANCH instructions in the predecessor block. +# The issue was fixed by iterating the blocks from bottom-up to ensure all endcf pseudos at the bottom of the +# function are processed first. +# This test ensures there are no multiple S_BRANCH instructions inserted in any block. + +name: no_multiple_unconditional_branches +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_multiple_unconditional_branches + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.14(0x40000000) + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF]], implicit $exec + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x40000000), %bb.14(0x40000000) + ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.2: + ; GCN: successors: %bb.3(0x40000000), %bb.7(0x40000000) + ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF2]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] + ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GCN: S_BRANCH %bb.3 + ; GCN: bb.3: + ; GCN: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; GCN: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_3]] + ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: successors: %bb.7(0x80000000) + ; GCN: S_BRANCH %bb.7 + ; GCN: bb.7: + ; GCN: successors: %bb.8(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN: S_BRANCH %bb.8 + ; GCN: bb.8: + ; GCN: successors: %bb.9(0x80000000) + ; GCN: S_BRANCH %bb.9 + ; GCN: bb.9: + ; GCN: successors: %bb.11(0x40000000), %bb.12(0x40000000) + ; GCN: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF4]], implicit $exec + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[COPY4]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_4]] + ; GCN: S_CBRANCH_EXECZ %bb.12, implicit $exec + ; GCN: S_BRANCH %bb.11 + ; GCN: bb.10: + ; GCN: successors: %bb.14(0x80000000) + ; GCN: S_BRANCH %bb.14 + ; GCN: bb.11: + ; GCN: successors: %bb.12(0x80000000) + ; GCN: S_BRANCH %bb.12 + ; GCN: bb.12: + ; GCN: successors: %bb.10(0x40000000), %bb.14(0x40000000) + ; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: [[S_AND_B64_5:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc + ; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_5]], implicit-def $scc + ; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN: S_BRANCH %bb.10 + ; GCN: bb.14: + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.14 + + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64 = V_CMP_EQ_U32_e64 0, killed %0:vgpr_32, implicit $exec + %2:sreg_64 = SI_IF %1:sreg_64, %bb.14, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + ; predecessors: %bb.0 + successors: %bb.2, %bb.6 + + %3:vgpr_32 = IMPLICIT_DEF + %4:sreg_64 = V_CMP_EQ_U32_e64 0, killed %3:vgpr_32, implicit $exec + %5:sreg_64 = SI_IF killed %4:sreg_64, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + ; predecessors: %bb.1 + successors: %bb.3, %bb.7 + + %6:vgpr_32 = IMPLICIT_DEF + %7:sreg_64 = V_CMP_EQ_U32_e64 0, killed %6:vgpr_32, implicit $exec + %8:sreg_64 = SI_IF killed %7:sreg_64, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + ; predecessors: %bb.2 + successors: %bb.4, %bb.5 + + %9:vgpr_32 = IMPLICIT_DEF + %10:sreg_64 = V_CMP_EQ_U32_e64 0, killed %9:vgpr_32, implicit $exec + %11:sreg_64 = SI_IF killed %10:sreg_64, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.4: + ; predecessors: %bb.3 + successors: %bb.5 + + S_BRANCH %bb.5 + + bb.5: + ; predecessors: %bb.3, %bb.4 + successors: %bb.7 + + SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.7 + + bb.6: + ; predecessors: %bb.1, %bb.13 + successors: %bb.14 + + SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.14 + + bb.7: + ; predecessors: %bb2, %bb.5 + successors: %bb.8 + + SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.8 + + bb.8: + ; predecessors: %bb.7 + successors: %bb.9 + + S_BRANCH %bb.9 + + bb.9: + ; predecessors: %bb.8 + successors: %bb.11, %bb.12 + + %12:vgpr_32 = IMPLICIT_DEF + %13:sreg_64 = V_CMP_EQ_U32_e64 0, killed %12:vgpr_32, implicit $exec + %14:sreg_64 = SI_IF killed %13:sreg_64, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.11 + + bb.10: + ; predecessors: %bb.12 + successors: %bb.13 + + S_BRANCH %bb.13 + + bb.11: + ; predecessors: %bb.9 + successors: %bb.12 + + S_BRANCH %bb.12 + + bb.12: + ; predecessors: %bb.9, %bb.11 + successors: %bb.10, %bb.13 + + %15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.10 + + bb.13: + ; predecessors: %bb.10, %bb.12 + successors: %bb.6 + + SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.6 + + bb.14: + ; predecessors: %bb.0, %bb.6 + + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 ...