Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -122,6 +122,19 @@ skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const; + /// Find the insertion point for a new conditional branch. + MachineBasicBlock::iterator + skipToUncondBrOrEnd(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + assert(I->isTerminator()); + + // FIXME: What if we had multiple pre-existing conditional branches? + MachineBasicBlock::iterator End = MBB.end(); + while (I != End && !I->isUnconditionalBranch()) + ++I; + return I; + } + // Remove redundant SI_END_CF instructions. void optimizeEndCf(); @@ -275,6 +288,10 @@ BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec) .addReg(Tmp, RegState::Kill); + // Skip ahead to the unconditional branch in case there are other terminators + // present. + I = skipToUncondBrOrEnd(MBB, I); + // Insert the S_CBRANCH_EXECZ instruction which will be optimized later // during SIRemoveShortExecBranches. MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) @@ -353,6 +370,10 @@ .addReg(Exec) .addReg(DstReg); + // Skip ahead to the unconditional branch in case there are other terminators + // present. + ElsePt = skipToUncondBrOrEnd(MBB, ElsePt); + MachineInstr *Branch = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) .addMBB(DestBB); @@ -435,8 +456,9 @@ .addReg(Exec) .add(MI.getOperand(0)); + auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator()); MachineInstr *Branch = - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + BuildMI(MBB, BranchPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .add(MI.getOperand(1)); if (LIS) { Index: llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-lower-control-flow -o - %s | FileCheck %s + +# Test si-lower-control-flow insertion points when other terminator +# instructions are present besides the control flow pseudo and a +# branch. + + +# There's another terminator instruction between SI_IF and +# S_BRANCH. The S_CBRANCH_EXECZ should be inserted immediately before +# S_BRANCH. +--- +name: other_terminator_sbranch_after_si_if +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_if + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +# S_CBRANCH_EXECZ should be inserted after the other terminator +--- +name: other_terminator_fallthrough_after_si_if +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_fallthrough_after_si_if + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_IF %2, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +--- +name: other_terminator_sbranch_after_si_else +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_else + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %2 + ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 [[COPY]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY1]], implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY2]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_ELSE %2, %bb.1, 0, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +--- +name: other_terminator_sbranch_after_si_loop +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_loop + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: $exec = S_ANDN2_B64_term $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + SI_LOOP %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... Index: llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir +++ llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir @@ -25,9 +25,9 @@ ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: S_BRANCH %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) @@ -43,9 +43,9 @@ ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec ; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: S_BRANCH %bb.2 bb.0: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31