Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1659,7 +1659,11 @@ // register allocation. MI.setDesc(get(AMDGPU::S_XOR_B32)); break; - + case AMDGPU::S_OR_B64_term: + // This is only a terminator to get the correct spill code placement during + // register allocation. + MI.setDesc(get(AMDGPU::S_OR_B64)); + break; case AMDGPU::S_OR_B32_term: // This is only a terminator to get the correct spill code placement during // register allocation. @@ -2236,6 +2240,7 @@ case AMDGPU::SI_MASK_BRANCH: case AMDGPU::S_MOV_B64_term: case AMDGPU::S_XOR_B64_term: + case AMDGPU::S_OR_B64_term: case AMDGPU::S_ANDN2_B64_term: case AMDGPU::S_MOV_B32_term: case AMDGPU::S_XOR_B32_term: Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -264,6 +264,7 @@ let WaveSizePredicate = isWave64 in { def S_MOV_B64_term : WrapTerminatorInst; def S_XOR_B64_term : WrapTerminatorInst; +def S_OR_B64_term : WrapTerminatorInst; def S_ANDN2_B64_term : WrapTerminatorInst; } Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -99,6 +99,7 @@ unsigned MovTermOpc; unsigned Andn2TermOpc; unsigned XorTermrOpc; + unsigned OrTermrOpc; unsigned OrSaveExecOpc; unsigned Exec; @@ -106,7 +107,10 @@ void emitElse(MachineInstr &MI); void emitIfBreak(MachineInstr &MI); void emitLoop(MachineInstr &MI); - void emitEndCf(MachineInstr &MI); + + MachineBasicBlock *splitBlock(MachineInstr &MI, MachineBasicBlock *BB, + LiveIntervals *LIS); + MachineBasicBlock *emitEndCf(MachineInstr &MI); void findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const; @@ -115,7 +119,7 @@ bool removeMBBifRedundant(MachineBasicBlock &MBB); - void process(MachineInstr &MI); + MachineBasicBlock *process(MachineInstr &MI); // Skip to the next instruction, ignoring debug instructions, and trivial // block boundaries (blocks that have one (typically fallthrough) successor, @@ -489,19 +493,73 @@ } while (true); } -void SILowerControlFlow::emitEndCf(MachineInstr &MI) { +MachineBasicBlock *SILowerControlFlow::splitBlock(MachineInstr &MI, + MachineBasicBlock *BB, + LiveIntervals *LIS) { + MachineBasicBlock::iterator SplitPoint(&MI); + ++SplitPoint; + + if (SplitPoint == BB->end()) { + // Don't bother with a new block. + return BB; + } + + // Make sure we add any physregs we define in the block as liveins to the new + // block. + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*BB); + for (auto I = BB->rbegin(), E = SplitPoint.getReverse(); I != E; ++I) + LiveRegs.stepBackward(*I); + + MachineFunction *MF = BB->getParent(); + MachineBasicBlock *SplitBB + = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + + MF->insert(++MachineFunction::iterator(BB), SplitBB); + SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end()); + + SplitBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(SplitBB); + + addLiveIns(*SplitBB, LiveRegs); + + if (LIS) + LIS->insertMBBInMaps(SplitBB, &MI); + + return SplitBB; +} + +MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - Register CFMask = MI.getOperand(0).getReg(); - MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = - Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) - : MBB.begin(); - MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) - .addReg(Exec) - .add(MI.getOperand(0)); + MachineBasicBlock::iterator InsPt = MBB.begin(); + + // If we have instructions that aren't prolog instructions, split the block + // and emit a terminator instruction. This ensures correct spill placement. + // FIXME: We should unconditionally split the block here. + bool NeedBlockSplit = false; + Register DataReg = MI.getOperand(0).getReg(); + for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator(); + I != E; ++I) { + if (I->modifiesRegister(DataReg, TRI)) { + NeedBlockSplit = true; + break; + } + } + + unsigned Opcode = OrOpc; + MachineBasicBlock *SplitBB = &MBB; + if (NeedBlockSplit) { + SplitBB = splitBlock(MI, &MBB, LIS); + Opcode = OrTermrOpc; + InsPt = MI; + } + + MachineInstr *NewMI = + BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec) + .addReg(Exec) + .add(MI.getOperand(0)); LoweredEndCf.insert(NewMI); @@ -522,6 +580,7 @@ if (LIS) LIS->handleMove(*NewMI); + return SplitBB; } // Returns replace operands for a logical operation, either single result @@ -608,11 +667,13 @@ } } -void SILowerControlFlow::process(MachineInstr &MI) { +MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); MachineBasicBlock::iterator I(MI); MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr; + MachineBasicBlock *SplitBB = &MBB; + switch (MI.getOpcode()) { case AMDGPU::SI_IF: emitIf(MI); @@ -631,7 +692,7 @@ break; case AMDGPU::SI_END_CF: - emitEndCf(MI); + SplitBB = emitEndCf(MI); break; default: @@ -656,6 +717,8 @@ break; } } + + return SplitBB; } bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { @@ -718,6 +781,7 @@ MovTermOpc = AMDGPU::S_MOV_B32_term; Andn2TermOpc = AMDGPU::S_ANDN2_B32_term; XorTermrOpc = AMDGPU::S_XOR_B32_term; + OrTermrOpc = AMDGPU::S_OR_B32_term; OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32; Exec = AMDGPU::EXEC_LO; } else { @@ -727,6 +791,7 @@ MovTermOpc = AMDGPU::S_MOV_B64_term; Andn2TermOpc = AMDGPU::S_ANDN2_B64_term; XorTermrOpc = AMDGPU::S_XOR_B64_term; + OrTermrOpc = AMDGPU::S_OR_B64_term; OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64; Exec = AMDGPU::EXEC; } @@ -734,19 +799,21 @@ SmallVector Worklist; MachineFunction::iterator NextBB; - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; BI = NextBB) { + for (MachineFunction::iterator BI = MF.begin(); + BI != MF.end(); BI = NextBB) { NextBB = std::next(BI); - MachineBasicBlock &MBB = *BI; + MachineBasicBlock *MBB = &*BI; - MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { + MachineBasicBlock::iterator I, E, Next; + E = MBB->end(); + for (I = MBB->begin(); I != E; I = Next) { Next = std::next(I); MachineInstr &MI = *I; + MachineBasicBlock *SplitMBB = MBB; switch (MI.getOpcode()) { case AMDGPU::SI_IF: - process(MI); + SplitMBB = process(MI); break; case AMDGPU::SI_ELSE: @@ -757,12 +824,17 @@ if (InsertKillCleanups) Worklist.push_back(&MI); else - process(MI); + SplitMBB = process(MI); break; default: break; } + + if (SplitMBB != MBB) { + MBB = Next->getParent(); + E = MBB->end(); + } } } Index: llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -196,6 +196,12 @@ MI.setDesc(TII.get(AMDGPU::S_XOR_B32)); return true; } + case AMDGPU::S_OR_B64_term: { + // This is only a terminator to get the correct spill code placement during + // register allocation. + MI.setDesc(TII.get(AMDGPU::S_OR_B64)); + return true; + } case AMDGPU::S_OR_B32_term: { // This is only a terminator to get the correct spill code placement during // register allocation. Index: llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -205,9 +205,11 @@ ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] ; CHECK: bb.2: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] - ; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc + ; CHECK: $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc + ; CHECK: bb.3: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: S_SLEEP 1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc Index: llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -1,18 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=liveintervals,si-lower-control-flow,si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s # Check that assert is not triggered -... --- name: si-lower-control-flow body: | bb.0: ; GCN-LABEL: name: si-lower-control-flow ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 + ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc - ; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc + ; GCN: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc ; GCN: S_ENDPGM 0 %0:sgpr_64 = COPY $sgpr4_sgpr5 %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 @@ -51,3 +50,324 @@ S_ENDPGM 0 ... + +# We need to split the block for SI_END_CF, but +--- +name: end_cf_split_block_end +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_end + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + %6:sreg_64_xexec = COPY %5 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: end_cf_split_block_physreg_livein +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_physreg_livein + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN: S_NOP 0 + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.3: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: S_SLEEP 3 + ; GCN: S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + liveins: $vgpr0, $sgpr4_sgpr5 + + %6:sreg_64_xexec = COPY %5 + S_NOP 0 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_SLEEP 3 + S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + + bb.2: + S_ENDPGM 0 + +... + +--- +name: end_cf_split_block_physreg_livein_liveout +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_physreg_livein_liveout + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003 + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.3: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9 + ; GCN: S_SLEEP 3 + ; GCN: S_NOP 0 + ; GCN: bb.2: + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003 + ; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003 + + %6:sreg_64_xexec = COPY %5 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_SLEEP 3 + S_NOP 0 + + bb.2: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003 + S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + +... + +--- +name: end_cf_split_block_physreg_liveout +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_physreg_liveout + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.3: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: liveins: $vgpr3 + ; GCN: $vgpr3 = V_MOV_B32_e32 0, implicit $exec + ; GCN: $sgpr4_sgpr5 = S_MOV_B64 32 + ; GCN: bb.2: + ; GCN: liveins: $vgpr3, $sgpr4_sgpr5 + ; GCN: S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + %6:sreg_64_xexec = COPY %5 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $sgpr4_sgpr5 = S_MOV_B64 32 + + bb.2: + liveins: $vgpr3, $sgpr4_sgpr5 + S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5 + +... + +--- +name: end_cf_split_block_physreg_live_across_split +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_physreg_live_across_split + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN: $sgpr4_sgpr5 = S_MOV_B64 32 + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.3: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: S_SLEEP 3, implicit $sgpr4_sgpr5 + ; GCN: S_NOP 0 + ; GCN: bb.2: + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + liveins: $vgpr0, $sgpr4_sgpr5 + + %6:sreg_64_xexec = COPY %5 + $sgpr4_sgpr5 = S_MOV_B64 32 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_SLEEP 3, implicit $sgpr4_sgpr5 + S_NOP 0 + + bb.2: + liveins: $vgpr0, $sgpr4_sgpr5 + S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + +... + +--- +name: end_cf_split_block_process_next_inst +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: end_cf_split_block_process_next_inst + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec + ; GCN: dead %5:sreg_64_xexec = S_MOV_B64 0 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] + ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN: bb.3: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %2:vgpr_32 = COPY killed $vgpr2 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, killed %1, implicit $exec + %4:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %2, implicit $exec + %5:sreg_64_xexec = S_MOV_B64 0 + + bb.1: + successors: %bb.2 + + %6:sreg_64_xexec = COPY %3 + SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec + %7:sreg_64_xexec = SI_IF %4, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec + %8:sreg_64_xexec = S_MOV_B64_term %7, implicit $exec + + bb.2: + S_ENDPGM 0 + +...