diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -274,24 +274,9 @@ ++I; } } else { - // We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are + // We don't want these pseudo instructions encoded. They are // placeholder terminator instructions and should only be printed as // comments. - if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) { - if (isVerbose()) { - SmallVector BBStr; - raw_svector_ostream Str(BBStr); - - const MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); - const MCSymbolRefExpr *Expr - = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - Expr->print(Str, MAI); - OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr); - } - - return; - } - if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { if (isVerbose()) OutStreamer->emitRawComment(" return to shader part epilog"); diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// This pass inserts branches on the 0 exec mask over divergent branches -/// branches when it's expected that jumping over the untaken control flow will -/// be cheaper than having every workitem no-op through it. +/// This pass mainly lowers early terminate pseudo instructions. // //===----------------------------------------------------------------------===// @@ -24,32 +22,21 @@ #define DEBUG_TYPE "si-insert-skips" -static cl::opt SkipThresholdFlag( - "amdgpu-skip-threshold-legacy", - cl::desc("Number of instructions before jumping over divergent control flow"), - cl::init(12), cl::Hidden); - namespace { class SIInsertSkips : public MachineFunctionPass { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; - unsigned SkipThreshold = 0; MachineDominatorTree *MDT = nullptr; MachineBasicBlock *EarlyExitBlock = nullptr; bool EarlyExitClearsExec = false; - bool shouldSkip(const MachineBasicBlock &From, - const MachineBasicBlock &To) const; - void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec); void earlyTerm(MachineInstr &MI); - bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB); - public: static char ID; @@ -87,53 +74,6 @@ if (MI.isMetaInstruction()) return true; - // Handle target specific opcodes. - switch (MI.getOpcode()) { - case AMDGPU::SI_MASK_BRANCH: - return true; - default: - return false; - } -} - -bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, - const MachineBasicBlock &To) const { - unsigned NumInstr = 0; - const MachineFunction *MF = From.getParent(); - - for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); - MBBI != End && MBBI != ToI; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - NumInstr < SkipThreshold && I != E; ++I) { - if (opcodeEmitsNoInsts(*I)) - continue; - - // FIXME: Since this is required for correctness, this should be inserted - // during SILowerControlFlow. - - // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) - return true; - - if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) - return true; - - // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || - I->getOpcode() == AMDGPU::S_WAITCNT) - return true; - - ++NumInstr; - if (NumInstr >= SkipThreshold) - return true; - } - } - return false; } @@ -209,29 +149,11 @@ MDT->getBase().insertEdge(&MBB, EarlyExitBlock); } -// Returns true if a branch over the block was inserted. -bool SIInsertSkips::skipMaskBranch(MachineInstr &MI, - MachineBasicBlock &SrcMBB) { - MachineBasicBlock *DestBB = MI.getOperand(0).getMBB(); - - if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB)) - return false; - - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = std::next(MI.getIterator()); - - BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) - .addMBB(DestBB); - - return true; -} - bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); MDT = &getAnalysis(); - SkipThreshold = SkipThresholdFlag; MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; @@ -246,10 +168,6 @@ MachineInstr &MI = *I; switch (MI.getOpcode()) { - case AMDGPU::SI_MASK_BRANCH: - MadeChange |= skipMaskBranch(MI, MBB); - break; - case AMDGPU::S_BRANCH: // Optimize out branches to the next block. // FIXME: Shouldn't this be handled by BranchFolding? diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2356,10 +2356,8 @@ // Skip over the instructions that are artificially terminators for special // exec management. - while (I != E && !I->isBranch() && !I->isReturn() && - I->getOpcode() != AMDGPU::SI_MASK_BRANCH) { + while (I != E && !I->isBranch() && !I->isReturn()) { switch (I->getOpcode()) { - case AMDGPU::SI_MASK_BRANCH: case AMDGPU::S_MOV_B64_term: case AMDGPU::S_XOR_B64_term: case AMDGPU::S_OR_B64_term: @@ -2387,34 +2385,7 @@ if (I == E) return false; - if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH) - return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); - - ++I; - - // TODO: Should be able to treat as fallthrough? - if (I == MBB.end()) - return true; - - if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify)) - return true; - - MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB(); - - // Specifically handle the case where the conditional branch is to the same - // destination as the mask branch. e.g. - // - // si_mask_branch BB8 - // s_cbranch_execz BB8 - // s_cbranch BB9 - // - // This is required to understand divergent loops which may need the branches - // to be relaxed. - if (TBB != MaskBrDest || Cond.empty()) - return true; - - auto Pred = Cond[0].getImm(); - return (Pred != EXECZ && Pred != EXECNZ); + return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); } unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, @@ -2425,11 +2396,6 @@ unsigned RemovedSize = 0; while (I != MBB.end()) { MachineBasicBlock::iterator Next = std::next(I); - if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) { - I = Next; - continue; - } - RemovedSize += getInstSizeInBytes(*I); I->eraseFromParent(); ++Count; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -259,19 +259,6 @@ // SI pseudo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. -// Dummy terminator instruction to use after control flow instructions -// replaced with exec mask operations. -def SI_MASK_BRANCH : VPseudoInstSI < - (outs), (ins brtarget:$target)> { - let isBranch = 0; - let isTerminator = 1; - let isBarrier = 0; - let SchedRW = []; - let hasNoSchedulingInfo = 1; - let FixedSize = 1; - let Size = 0; -} - let isTerminator = 1 in { let OtherPredicates = [EnableLateCFGStructurize] in { diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -39,7 +39,7 @@ %20:sreg_64 = COPY $exec, implicit-def $exec %21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc $exec = S_MOV_B64_term %21 - SI_MASK_BRANCH %bb.4, implicit $exec + S_CBRANCH_EXECZ %bb.4, implicit $exec S_BRANCH %bb.3 bb.3: @@ -73,7 +73,7 @@ %29:sreg_64 = COPY $exec, implicit-def $exec %30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc $exec = S_MOV_B64_term %30 - SI_MASK_BRANCH %bb.10, implicit $exec + S_CBRANCH_EXECZ %bb.10, implicit $exec S_BRANCH %bb.8 bb.8: diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -90,7 +90,7 @@ %44:sreg_64 = S_AND_B64 %43, %30, implicit-def dead $scc %45:sreg_64 = S_XOR_B64 %44, %43, implicit-def dead $scc $exec = S_MOV_B64_term killed %44 - SI_MASK_BRANCH %bb.9, implicit $exec + S_CBRANCH_EXECZ %bb.9, implicit $exec S_BRANCH %bb.8 bb.5: @@ -122,7 +122,7 @@ %67:sreg_64 = COPY $exec, implicit-def $exec %68:sreg_64 = S_AND_B64 %67, %61, implicit-def dead $scc $exec = S_MOV_B64_term killed %68 - SI_MASK_BRANCH %bb.13, implicit $exec + S_CBRANCH_EXECZ %bb.13, implicit $exec S_BRANCH %bb.7 bb.7: @@ -198,7 +198,7 @@ %90:sreg_64 = S_AND_B64 %89, %87, implicit-def dead $scc %46:sreg_64 = S_XOR_B64 %90, %89, implicit-def dead $scc $exec = S_MOV_B64_term killed %90 - SI_MASK_BRANCH %bb.5, implicit $exec + S_CBRANCH_EXECZ %bb.5, implicit $exec S_BRANCH %bb.15 bb.13: @@ -211,7 +211,7 @@ %95:sreg_64 = COPY $exec, implicit-def $exec %96:sreg_64 = S_AND_B64 %95, %93, implicit-def dead $scc $exec = S_MOV_B64_term killed %96 - SI_MASK_BRANCH %bb.16, implicit $exec + S_CBRANCH_EXECZ %bb.16, implicit $exec S_BRANCH %bb.14 bb.14: diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir @@ -83,7 +83,7 @@ %23:sreg_64 = S_AND_B64 %22, %18, implicit-def dead $scc %24:sreg_64 = S_XOR_B64 %23, %22, implicit-def dead $scc $exec = S_MOV_B64_term killed %23 - SI_MASK_BRANCH %bb.7, implicit $exec + S_CBRANCH_EXECZ %bb.7, implicit $exec S_BRANCH %bb.18 bb.7: diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -74,7 +74,7 @@ %23:sreg_64 = COPY $exec, implicit-def $exec %24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc $exec = S_MOV_B64_term killed %24 - SI_MASK_BRANCH %bb.7, implicit $exec + S_CBRANCH_EXECZ %bb.7, implicit $exec S_BRANCH %bb.5 bb.5: @@ -153,7 +153,7 @@ %50:sreg_64 = COPY $exec, implicit-def $exec %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc $exec = S_MOV_B64_term killed %51 - SI_MASK_BRANCH %bb.16, implicit $exec + S_CBRANCH_EXECZ %bb.16, implicit $exec S_BRANCH %bb.15 bb.15: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir @@ -30,7 +30,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) @@ -47,7 +47,7 @@ ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) @@ -77,7 +77,7 @@ %3:sreg_64 = COPY $exec, implicit-def $exec %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec + S_CBRANCH_EXECZ %bb.4, implicit $exec S_BRANCH %bb.1 bb.1: @@ -96,7 +96,7 @@ %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec S_BRANCH %bb.2 bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir +++ /dev/null @@ -1,58 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s - ---- - -name: skip_execz_flat -body: | - ; CHECK-LABEL: name: skip_execz_flat - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - - bb.2: - S_ENDPGM 0 -... - ---- - -name: skip_execz_mubuf -body: | - ; CHECK-LABEL: name: skip_execz_mubuf - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec - - bb.2: - S_ENDPGM 0 -... diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir +++ /dev/null @@ -1,59 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s -# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0 - ---- - -name: skip_gws_init -body: | - ; CHECK-LABEL: name: skip_gws_init - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: DS_GWS_INIT $vgpr0, 0, implicit $m0, implicit $exec - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - DS_GWS_INIT $vgpr0, 0, implicit $m0, implicit $exec - - bb.2: - S_ENDPGM 0 -... - ---- - -name: skip_gws_barrier -body: | - ; CHECK-LABEL: name: skip_gws_barrier - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: DS_GWS_BARRIER $vgpr0, 0, implicit $m0, implicit $exec - ; CHECK: bb.2: - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - DS_GWS_BARRIER $vgpr0, 0, implicit $m0, implicit $exec - - bb.2: - S_ENDPGM 0 -... diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir +++ /dev/null @@ -1,54 +0,0 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s - ---- - -# CHECK-LABEL: name: no_count_mask_branch_pseudo -# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec -# CHECK-NEXT: SI_MASK_BRANCH -# CHECK-NOT: S_CBRANCH_EXECZ -name: no_count_mask_branch_pseudo -body: | - bb.0: - successors: %bb.1 - - $vgpr1 = V_MOV_B32_e32 7, implicit $exec - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - SI_MASK_BRANCH %bb.3, implicit $exec - - bb.2: - $vgpr0 = V_MOV_B32_e32 1, implicit $exec - - bb.3: - S_ENDPGM 0 -... - ---- - -# CHECK-LABEL: name: no_count_dbg_value -# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec -# CHECK-NEXT: SI_MASK_BRANCH -# CHECK-NOT: S_CBRANCH_EXECZ -name: no_count_dbg_value -body: | - bb.0: - successors: %bb.1 - - $vgpr1 = V_MOV_B32_e32 7, implicit $exec - SI_MASK_BRANCH %bb.2, implicit $exec - - bb.1: - successors: %bb.2 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - DBG_VALUE - - bb.2: - $vgpr0 = V_MOV_B32_e32 1, implicit $exec - - bb.3: - S_ENDPGM 0 -... - diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir --- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir +++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir @@ -9,25 +9,16 @@ body: | ; GCN-LABEL: name: loop_header_nopred ; GCN: bb.0: - ; GCN: successors: %bb.1(0x80000000) - ; GCN: S_INST_PREFETCH 1 - ; GCN: S_BRANCH %bb.1 - ; GCN: bb.6 (align 64): - ; GCN: successors: %bb.7(0x04000000), %bb.1(0x7c000000) - ; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc - ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: S_CBRANCH_VCCNZ %bb.2, implicit $vcc - ; GCN: bb.3: - ; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000) - ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec - ; GCN: S_BRANCH %bb.4 - ; GCN: bb.2 (align 64): - ; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000) - ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec - ; GCN: S_BRANCH %bb.4 - ; GCN: bb.4: - ; GCN: successors: %bb.5(0x04000000), %bb.4(0x7c000000) + ; GCN: successors: %bb.2(0x80000000) + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1 (align 64): + ; GCN: successors: %bb.7(0x04000000), %bb.2(0x7c000000) + ; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo + ; GCN: bb.2: + ; GCN: successors: %bb.5(0x40000000), %bb.1(0x40000000) + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.5: + ; GCN: successors: %bb.1(0x04000000), %bb.5(0x7c000000) ; GCN: S_NOP 0 ; GCN: S_NOP 0 ; GCN: S_NOP 0 @@ -39,10 +30,8 @@ ; GCN: S_NOP 0 ; GCN: S_NOP 0 ; GCN: S_NOP 0 - ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec - ; GCN: bb.5: - ; GCN: successors: %bb.6(0x80000000) - ; GCN: S_BRANCH %bb.6 + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GCN: S_BRANCH %bb.1 ; GCN: bb.7: ; GCN: S_ENDPGM 0 bb.0: @@ -60,7 +49,7 @@ bb.3: successors: %bb.4(0x40000000), %bb.6(0x40000000) - SI_MASK_BRANCH %bb.6, implicit $exec + S_CBRANCH_EXECZ %bb.6, implicit $exec S_BRANCH %bb.4 bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -86,7 +86,7 @@ V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec $sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc - SI_MASK_BRANCH %bb.2.exit, implicit $exec + S_CBRANCH_EXECZ %bb.2.exit, implicit $exec bb.1.atomic: successors: %bb.2.exit(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir @@ -22,7 +22,7 @@ ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY1]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.2, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x80000000) @@ -32,7 +32,7 @@ ; GCN: $exec = S_AND_B64 $exec, [[COPY]], implicit-def dead $scc ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc - ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec ; GCN: S_BRANCH %bb.3 ; GCN: bb.3: ; GCN: successors: %bb.4(0x40000000), %bb.5(0x40000000) @@ -40,7 +40,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_2]] - ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: S_BRANCH %bb.4 ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -60,7 +60,7 @@ %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc %5:sreg_64 = S_XOR_B64 %4, %3, implicit-def dead $scc $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: @@ -72,7 +72,7 @@ $exec = S_AND_B64 $exec, %0, implicit-def dead $scc %7:sreg_64 = S_AND_B64 $exec, %6, implicit-def $scc $exec = S_XOR_B64_term $exec, %7, implicit-def $scc - SI_MASK_BRANCH %bb.6, implicit $exec + S_CBRANCH_EXECZ %bb.6, implicit $exec S_BRANCH %bb.3 bb.3: @@ -82,7 +82,7 @@ %9:sreg_64 = COPY $exec, implicit-def $exec %10:sreg_64 = S_AND_B64 %9, %8, implicit-def dead $scc $exec = S_MOV_B64_term %10 - SI_MASK_BRANCH %bb.5, implicit $exec + S_CBRANCH_EXECZ %bb.5, implicit $exec S_BRANCH %bb.4 bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -128,7 +128,7 @@ # CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}} # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor liveins: @@ -143,7 +143,7 @@ $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -166,7 +166,7 @@ --- # CHECK-LABEL: name: optimize_if_and_saveexec{{$}} # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec liveins: @@ -180,7 +180,7 @@ $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -203,7 +203,7 @@ --- # CHECK-LABEL: name: optimize_if_or_saveexec{{$}} # CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_or_saveexec liveins: @@ -217,7 +217,7 @@ $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -243,7 +243,7 @@ # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_valu_middle liveins: - { reg: '$vgpr0' } @@ -258,7 +258,7 @@ BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -283,7 +283,7 @@ # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY $sgpr0_sgpr1 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec +# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_wrong_reg liveins: - { reg: '$vgpr0' } @@ -299,7 +299,7 @@ $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term $sgpr0_sgpr1 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -322,7 +322,7 @@ # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec +# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_modify_copy_to_exec liveins: @@ -338,7 +338,7 @@ $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -365,7 +365,7 @@ # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY $sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_live_out_setexec liveins: - { reg: '$vgpr0' } @@ -379,7 +379,7 @@ $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -404,7 +404,7 @@ # CHECK: $sgpr0_sgpr1 = COPY $exec # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec +# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_unknown_saveexec liveins: @@ -418,7 +418,7 @@ $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -441,7 +441,7 @@ --- # CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}} # CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec -# CHECK-NEXT: SI_MASK_BRANCH +# CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_andn2_saveexec liveins: @@ -455,7 +455,7 @@ $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -479,7 +479,7 @@ # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec +# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_andn2_saveexec_no_commute liveins: - { reg: '$vgpr0' } @@ -492,7 +492,7 @@ $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: @@ -531,7 +531,7 @@ $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1.if: diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir --- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir +++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -15,7 +15,7 @@ $vgpr4 = V_AND_B32_e32 1, $vgpr1, implicit $exec V_CMP_EQ_U32_e32 1, killed $vgpr4, implicit-def $vcc, implicit $exec $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir +++ /dev/null @@ -1,194 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s - ---- -name: skip_branch_taildup_endpgm -machineFunctionInfo: - isEntryFunction: true -body: | - ; CHECK-LABEL: name: skip_branch_taildup_endpgm - ; CHECK: bb.0: - ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) - ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) - ; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec - ; CHECK: S_WAITCNT 127 - ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - ; CHECK: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec - ; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec - ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) - ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) - ; CHECK: S_WAITCNT 112 - ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec - ; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc - ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec - ; CHECK: S_BRANCH %bb.3 - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) - ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: S_BRANCH %bb.4 - ; CHECK: bb.2: - ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - ; CHECK: S_ENDPGM 0 - ; CHECK: bb.3: - ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) - ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec - ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.4: - ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec - ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec - ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - ; CHECK: S_ENDPGM 0 - bb.0: - successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7 - - renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) - renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec - S_WAITCNT 127 - $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec - renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec - renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) - renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) - S_WAITCNT 112 - V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec - $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc - SI_MASK_BRANCH %bb.2, implicit $exec - S_BRANCH %bb.1 - - bb.2: - successors: %bb.3, %bb.4 - liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - - renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec - $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.3 - - bb.4: - liveins: $sgpr2_sgpr3 - - $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc - renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - S_ENDPGM 0 - - bb.1: - successors: %bb.3, %bb.4 - liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - - renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec - $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec - $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.3 - - bb.3: - liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - - renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec - $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec - $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc - renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - S_ENDPGM 0 - -... - ---- -name: skip_branch_taildup_ret -body: | - ; CHECK-LABEL: name: skip_branch_taildup_ret - ; CHECK: bb.0: - ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) - ; CHECK: S_WAITCNT 0 - ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec - ; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc - ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec - ; CHECK: S_BRANCH %bb.3 - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) - ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: S_BRANCH %bb.4 - ; CHECK: bb.2: - ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31 - ; CHECK: bb.3: - ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec - ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec - ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; CHECK: bb.4: - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec - ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31 - bb.0: - successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr30_sgpr31, $vgpr1_vgpr2 - - S_WAITCNT 0 - V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec - $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc - SI_MASK_BRANCH %bb.2, implicit $exec - S_BRANCH %bb.1 - - bb.2: - successors: %bb.3, %bb.4 - liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 - - renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec - $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.3 - - bb.4: - liveins: $sgpr6_sgpr7, $sgpr30_sgpr31 - - $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - S_SETPC_B64_return $sgpr30_sgpr31 - - bb.1: - successors: %bb.3, %bb.4 - liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 - - renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec - renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec - $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.3 - - bb.3: - liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2 - - renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec - $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec - S_SETPC_B64_return $sgpr30_sgpr31 - -... diff --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir --- a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir @@ -49,7 +49,7 @@ %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec $exec = S_XOR_B64_term $exec, %0, implicit-def $scc - SI_MASK_BRANCH %bb.3, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec S_BRANCH %bb.2 bb.2: @@ -57,7 +57,7 @@ %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec $exec = S_XOR_B64_term $exec, %0, implicit-def $scc - SI_MASK_BRANCH %bb.3, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec S_BRANCH %bb.4 bb.3: diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -203,7 +203,7 @@ %47:sreg_64 = COPY $exec, implicit-def $exec %48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc $exec = S_MOV_B64_term %48 - SI_MASK_BRANCH %bb.18, implicit $exec + S_CBRANCH_EXECZ %bb.18, implicit $exec S_BRANCH %bb.16 bb.16: diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -52,7 +52,7 @@ SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: @@ -117,7 +117,7 @@ $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) $exec = S_MOV_B64_term killed $sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: