Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -51,6 +51,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -81,6 +82,7 @@ const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; MachineRegisterInfo *MRI = nullptr; + DenseSet LoweredEndCf; const TargetRegisterClass *BoolRC = nullptr; unsigned AndOpc; @@ -103,6 +105,13 @@ void combineMasks(MachineInstr &MI); + // Skip to the next instruction, ignoring debug instructions, and trivial + // block boundaries (blocks that have one (typically fallthrough) successor, + // and the successor has one predecessor. + MachineBasicBlock::iterator + skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB, + MachineBasicBlock::iterator It) const; + public: static char ID; @@ -396,6 +405,36 @@ MI.eraseFromParent(); } +MachineBasicBlock::iterator +SILowerControlFlow::skipIgnoreExecInstsTrivialSucc( + MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { + + SmallSet Visited; + MachineBasicBlock *B = &MBB; + do { + if (!Visited.insert(B).second) + return MBB.end(); + + auto E = B->end(); + for ( ; It != E; ++It) { + if (TII->mayReadEXEC(*MRI, *It)) + break; + } + + if (It != E) + return It; + + if (B->succ_size() != 1) + return MBB.end(); + + // If there is one trivial successor, advance to the next block. + MachineBasicBlock *Succ = *B->succ_begin(); + + It = Succ->begin(); + B = Succ; + } while (true); +} + void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -403,6 +442,18 @@ MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); + // If the only instruction immediately following this END_CF is an another + // END_CF in the only successor we can avoid emitting exec mask restore here. + auto Next = skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI.getIterator())); + if (Next != MBB.end() && (Next->getOpcode() == AMDGPU::SI_END_CF || + LoweredEndCf.count(&*Next))) { + LLVM_DEBUG(dbgs() << "Skip redundant "; MI.dump()); + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + return; + } + MachineBasicBlock::iterator InsPt = Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) : MBB.begin(); @@ -410,6 +461,8 @@ .addReg(Exec) .add(MI.getOperand(0)); + LoweredEndCf.insert(NewMI); + if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *NewMI); @@ -556,5 +609,7 @@ } } + LoweredEndCf.clear(); + return true; } Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1,10 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefixes=GCN,ALL %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-opt-exec-mask-pre-ra=0 < %s | FileCheck -enable-var-scope -check-prefixes=DISABLED,ALL %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; ALL-LABEL: {{^}}simple_nested_if: +; GCN-LABEL: {{^}}simple_nested_if: ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]] -; GCN: s_and_b64 exec, exec, vcc + +; TODO: this does not need to save exec, just perform the and. +; GCN: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc + ; GCN-NEXT: s_cbranch_execz [[ENDIF]] ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; GCN: store_dword @@ -13,9 +15,6 @@ ; GCN: ds_write_b32 ; GCN: s_endpgm - -; DISABLED: s_or_b64 exec, exec -; DISABLED: s_or_b64 exec, exec define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -39,7 +38,7 @@ ret void } -; ALL-LABEL: {{^}}uncollapsable_nested_if: +; GCN-LABEL: {{^}}uncollapsable_nested_if: ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]] ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] @@ -82,7 +81,7 @@ ret void } -; ALL-LABEL: {{^}}nested_if_if_else: +; GCN-LABEL: {{^}}nested_if_if_else: ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]] ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] @@ -128,7 +127,7 @@ ret void } -; ALL-LABEL: {{^}}nested_if_else_if: +; GCN-LABEL: {{^}}nested_if_else_if: ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]] @@ -151,9 +150,9 @@ ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; GCN: store_dword ; GCN-NEXT: [[FLOW1]]: -; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]] -; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: -; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] +; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]] +; GCN-NOT: s_or_b64 exec +; GCN-NOT: {{^.*:}} ; GCN: ds_write_b32 ; GCN: s_endpgm define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { @@ -191,7 +190,7 @@ ret void } -; ALL-LABEL: {{^}}s_endpgm_unsafe_barrier: +; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]] ; GCN-NEXT: ; %bb.{{[0-9]+}}: @@ -216,8 +215,7 @@ ret void } -; Make sure scc liveness is updated if sor_b64 is removed -; ALL-LABEL: {{^}}scc_liveness: +; GCN-LABEL: {{^}}scc_liveness: ; GCN: %bb10 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} @@ -229,7 +227,9 @@ ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen -; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}} + +; TODO: this does not need to save exec, just perform the and. +; GCN: s_and_saveexec_b64 s[{{[0-9:]+}}], {{vcc|s\[[0-9:]+\]}} ; GCN-NOT: s_or_b64 exec, exec Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra -amdgpu-remove-redundant-endcf %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=si-lower-control-flow %s -o - | FileCheck -check-prefix=GCN %s # Make sure dbg_value doesn't change codeegn when collapsing end_cf --- @@ -20,37 +20,36 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: DBG_VALUE ; GCN: bb.4: - ; GCN: DBG_VALUE ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN: DBG_VALUE ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN: $m0 = S_MOV_B32 -1 @@ -63,11 +62,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -82,11 +77,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -95,12 +86,12 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec DBG_VALUE bb.4: DBG_VALUE - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -128,31 +119,30 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: @@ -171,11 +161,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -190,11 +176,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -203,12 +185,12 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.5: bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -236,31 +218,30 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: @@ -280,11 +261,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -299,11 +276,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -312,13 +285,13 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.5: DBG_VALUE bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -345,38 +318,35 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GCN: dead %16:sgpr_32 = S_BREV_B32 [[DEF]] + ; GCN: [[S_BREV_B32_:%[0-9]+]]:sgpr_32 = S_BREV_B32 [[DEF]] ; GCN: KILL [[DEF]] - ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec @@ -391,11 +361,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -410,11 +376,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -426,10 +388,10 @@ %15:sgpr_32 = IMPLICIT_DEF %16:sgpr_32 = S_BREV_B32 %15 KILL %15 - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -458,37 +420,36 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GCN: [[S_BREV_B32_:%[0-9]+]]:sgpr_32 = S_BREV_B32 [[DEF]] ; GCN: KILL [[DEF]] - ; GCN: dead %17:sgpr_32 = COPY [[S_BREV_B32_]] + ; GCN: [[COPY5:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec @@ -503,11 +464,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -522,11 +479,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -535,14 +488,14 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:sgpr_32 = IMPLICIT_DEF %16:sgpr_32 = S_BREV_B32 %15 KILL %15 %19:sgpr_32 = COPY %16 bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -570,36 +523,34 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc - ; GCN: dead %15:sreg_64 = S_BREV_B64 $exec + ; GCN: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 $exec ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec @@ -614,11 +565,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -633,11 +580,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -646,11 +589,11 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:sreg_64 = S_BREV_B64 $exec bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -678,36 +621,34 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc - ; GCN: dead %15:vgpr_32 = COPY %5.sub2 + ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY %4.sub2 ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec @@ -722,11 +663,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -741,11 +678,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -754,11 +687,11 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:vgpr_32 = COPY %5.sub2 bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -767,7 +700,6 @@ ... -# There's no real reason this can't be handled, but isn't now. --- name: simple_nested_if_not_layout_successor tracksRegLiveness: true @@ -786,35 +718,32 @@ ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec - ; GCN: S_BRANCH %bb.1 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec - ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec - ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec - ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 - ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec + ; GCN: %5.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %4.sub1 + ; GCN: undef %7.sub0:vreg_64, %8:sreg_64_xexec = V_ADD_I32_e64 %4.sub0, %5.sub0, 0, implicit $exec + ; GCN: %7.sub1:vreg_64, dead %9:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %8, 0, implicit $exec + ; GCN: %4.sub3:sgpr_128 = S_MOV_B32 61440 + ; GCN: %4.sub2:sgpr_128 = S_MOV_B32 0 + ; GCN: BUFFER_STORE_DWORD_ADDR64 %5.sub1, %5, %4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] - ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec - ; GCN: S_BRANCH %bb.2 + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.2: ; GCN: successors: %bb.3(0x80000000) - ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 - ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 + ; GCN: %4.sub0:sgpr_128 = COPY %4.sub2 + ; GCN: %4.sub1:sgpr_128 = COPY %4.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %7, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) - ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc ; GCN: S_BRANCH %bb.5 ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc @@ -833,11 +762,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec - %3:sreg_64 = COPY $exec, implicit-def $exec - %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc - $exec = S_MOV_B64_term %4 - SI_MASK_BRANCH %bb.4, implicit $exec - S_BRANCH %bb.1 + %3:sreg_64 = SI_IF %2:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.1: successors: %bb.2, %bb.3 @@ -852,11 +777,7 @@ %5.sub2:sgpr_128 = S_MOV_B32 0 BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec - %12:sreg_64 = COPY $exec, implicit-def $exec - %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc - $exec = S_MOV_B64_term %13 - SI_MASK_BRANCH %bb.3, implicit $exec - S_BRANCH %bb.2 + %12:sreg_64 = SI_IF %11:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec bb.2: %5.sub0:sgpr_128 = COPY %5.sub2 @@ -865,11 +786,11 @@ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: - $exec = S_OR_B64 $exec, %12, implicit-def $scc + SI_END_CF %12:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.5 bb.4: - $exec = S_OR_B64 $exec, %3, implicit-def $scc + SI_END_CF %3:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec %15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 @@ -880,3 +801,36 @@ S_BRANCH %bb.4 ... + +# This should never happen, but check that we do not hang anyway + +--- +name: resursive_endcf +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: resursive_endcf + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY]], implicit $exec + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, [[V_CMP_LT_U32_e64_]], implicit-def $scc + ; GCN: S_BRANCH %bb.1 + bb.0: + successors: %bb.1 + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_64 = V_CMP_LT_U32_e64 1, %0, implicit $exec + + bb.1: + successors: %bb.1 + + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 Index: llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -58,7 +58,7 @@ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 ; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 ; GFX9-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] -; GFX9-NEXT: s_cbranch_execz BB1_4 +; GFX9-NEXT: s_cbranch_execz BB1_3 ; GFX9-NEXT: ; %bb.1: ; %bb19 ; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v6 @@ -100,9 +100,7 @@ ; GFX9-NEXT: v_add_u32_e32 v3, v3, v6 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[12:13] ; GFX9-NEXT: s_cbranch_execnz BB1_2 -; GFX9-NEXT: ; %bb.3: ; %Flow -; GFX9-NEXT: s_or_b64 exec, exec, s[12:13] -; GFX9-NEXT: BB1_4: ; %Flow3 +; GFX9-NEXT: BB1_3: ; %Flow3 ; GFX9-NEXT: s_or_b64 exec, exec, s[10:11] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31]