Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -122,6 +122,19 @@ skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const; + /// Find the insertion point for a new conditional branch. + MachineBasicBlock::iterator + skipToUncondBrOrEnd(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + assert(I->isTerminator()); + + // FIXME: What if we had multiple pre-existing conditional branches? + MachineBasicBlock::iterator End = MBB.end(); + while (I != End && !I->isUnconditionalBranch()) + ++I; + return I; + } + // Remove redundant SI_END_CF instructions. void optimizeEndCf(); @@ -275,6 +288,10 @@ BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec) .addReg(Tmp, RegState::Kill); + // Skip ahead to the unconditional branch in case there are other terminators + // present. + I = skipToUncondBrOrEnd(MBB, I); + // Insert the S_CBRANCH_EXECZ instruction which will be optimized later // during SIRemoveShortExecBranches. MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) @@ -353,6 +370,10 @@ .addReg(Exec) .addReg(DstReg); + // Skip ahead to the unconditional branch in case there are other terminators + // present. + ElsePt = skipToUncondBrOrEnd(MBB, ElsePt); + MachineInstr *Branch = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) .addMBB(DestBB); @@ -435,8 +456,9 @@ .addReg(Exec) .add(MI.getOperand(0)); + auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator()); MachineInstr *Branch = - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + BuildMI(MBB, BranchPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .add(MI.getOperand(1)); if (LIS) { Index: llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -0,0 +1,246 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-lower-control-flow -o - %s | FileCheck %s + +# Test si-lower-control-flow insertion points when other terminator +# instructions are present besides the control flow pseudo and a +# branch. + + +# There's another terminator instruction between SI_IF and +# S_BRANCH. The S_CBRANCH_EXECZ should be inserted immediately before +# S_BRANCH. +--- +name: other_terminator_sbranch_after_si_if +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_if + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +# S_CBRANCH_EXECZ should be inserted after the other terminator +--- +name: other_terminator_fallthrough_after_si_if +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_fallthrough_after_si_if + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_IF %2, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +--- +name: other_terminator_sbranch_after_si_else +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_else + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %2 + ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 [[COPY]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY1]], implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY2]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + %3:sreg_64_xexec = SI_ELSE %2, %bb.1, 0, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +--- +name: other_terminator_sbranch_after_si_loop +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: other_terminator_sbranch_after_si_loop + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec + ; CHECK: $exec = S_ANDN2_B64_term $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_term]] + bb.0: + successors: %bb.2, %bb.1 + liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:sreg_64_xexec = COPY $sgpr4_sgpr5 + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec + SI_LOOP %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0, implicit %4 + +... + +# The save exec result register of SI_IF is used by other terminators +# inserted to behave as a lowered phi. The output register of SI_IF +# was ignored, and the def was removed, so the S_MOV_B64_term uses +# would fail the verifier. + +--- +name: si_if_use +alignment: 1 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: si_if_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] + ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] + ; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc + ; CHECK: S_SLEEP 1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec + ; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec + %13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec + S_BRANCH %bb.2 + + bb.1: + %11:sreg_64_xexec = COPY %13 + dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + %14:sreg_64_xexec = COPY %11 + + bb.2: + %12:sreg_64_xexec = COPY %14 + SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_SLEEP 1 + %9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec + %13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec + S_BRANCH %bb.2 + +... Index: llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir +++ /dev/null @@ -1,75 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s - -# The save exec result register of SI_IF is used by other terminators -# inserted to behave as a lowered phi. The output register of SI_IF -# was ignored, and the def was removed, so the S_MOV_B64_term uses -# would fail the verifier. - ---- -name: si_if_use -alignment: 1 -legalized: true -regBankSelected: true -selected: true -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: si_if_use - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec - ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec - ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec - ; CHECK: S_BRANCH %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] - ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] - ; CHECK: bb.2: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] - ; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc - ; CHECK: S_SLEEP 1 - ; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc - ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec - ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec - ; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec - ; CHECK: S_BRANCH %bb.2 - bb.0: - liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - - %0:vgpr_32 = COPY killed $vgpr0 - %1:vgpr_32 = COPY killed $vgpr1 - %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec - %10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec - %14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec - %13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec - S_BRANCH %bb.2 - - bb.1: - %11:sreg_64_xexec = COPY %13 - dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) - %14:sreg_64_xexec = COPY %11 - - bb.2: - %12:sreg_64_xexec = COPY %14 - SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec - S_SLEEP 1 - %9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec - %14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec - %13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec - S_BRANCH %bb.2 - -...