Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2278,22 +2278,30 @@ // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineInstr *&Br) { + MachineInstr *&Br, + MachineBasicBlock *&UncondBrTarget) { Register CondDef = MI.getOperand(0).getReg(); if (!MRI.hasOneNonDBGUse(CondDef)) return nullptr; + MachineBasicBlock *Parent = MI.getParent(); MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef); - if (UseMI.getParent() != MI.getParent() || + if (UseMI.getParent() != Parent || UseMI.getOpcode() != AMDGPU::G_BRCOND) return nullptr; - // Make sure the cond br is followed by a G_BR + // Make sure the cond br is followed by a G_BR, or is the last instruction. MachineBasicBlock::iterator Next = std::next(UseMI.getIterator()); - if (Next != MI.getParent()->end()) { + if (Next == Parent->end()) { + MachineFunction::iterator NextMBB = std::next(Parent->getIterator()); + if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use. + return nullptr; + UncondBrTarget = &*NextMBB; + } else { if (Next->getOpcode() != AMDGPU::G_BR) return nullptr; Br = &*Next; + UncondBrTarget = Br->getOperand(0).getMBB(); } return &UseMI; @@ -4110,7 +4118,8 @@ case Intrinsic::amdgcn_if: case Intrinsic::amdgcn_else: { MachineInstr *Br = nullptr; - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) { + MachineBasicBlock *UncondBrTarget = nullptr; + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); @@ -4118,25 +4127,28 @@ Register Def = MI.getOperand(1).getReg(); Register Use = MI.getOperand(3).getReg(); - MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB(); - if (Br) - BrTarget = Br->getOperand(0).getMBB(); - + MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); if (IntrID == Intrinsic::amdgcn_if) { B.buildInstr(AMDGPU::SI_IF) .addDef(Def) .addUse(Use) - .addMBB(BrTarget); + .addMBB(UncondBrTarget); } else { B.buildInstr(AMDGPU::SI_ELSE) .addDef(Def) .addUse(Use) - .addMBB(BrTarget) + .addMBB(UncondBrTarget) .addImm(0); } - if (Br) - Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB()); + if (Br) { + Br->getOperand(0).setMBB(CondBrTarget); + } else { + // The IRTranslator skips inserting the G_BR for fallthrough cases, but + // since we're swapping branch targets it needs to be reinserted. + // FIXME: IRTranslator should probably not do this + B.buildBr(*CondBrTarget); + } MRI.setRegClass(Def, TRI->getWaveMaskRegClass()); MRI.setRegClass(Use, TRI->getWaveMaskRegClass()); @@ -4149,23 +4161,23 @@ } case Intrinsic::amdgcn_loop: { MachineInstr *Br = nullptr; - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) { + MachineBasicBlock *UncondBrTarget = nullptr; + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); B.setInstr(*BrCond); - MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB(); - if (Br) - BrTarget = Br->getOperand(0).getMBB(); - + MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); Register Reg = MI.getOperand(2).getReg(); B.buildInstr(AMDGPU::SI_LOOP) .addUse(Reg) - .addMBB(BrTarget); + .addMBB(UncondBrTarget); if (Br) - Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB()); + Br->getOperand(0).setMBB(CondBrTarget); + else + B.buildBr(*CondBrTarget); MI.eraseFromParent(); BrCond->eraseFromParent(); Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4516,6 +4516,7 @@ } else { // Get the target from BR if we don't negate the condition BR = findUser(BRCOND, ISD::BR); + assert(BR && "brcond missing unconditional branch user"); Target = BR->getOperand(1); } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_cbranch_execnz BB1_2 +; CHECK-NEXT: s_cbranch_execz BB1_2 ; CHECK-NEXT: ; %bb.1: ; %if.true ; CHECK-NEXT: global_load_dword v0, v[0:1], off ; CHECK-NEXT: BB1_2: ; %endif Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir @@ -0,0 +1,21 @@ +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s + +# Make sure there's no crash if there is somehow no successor block. + +# ERR: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block) + +--- +name: brcond_si_if_no_succ_block +body: | + bb.0: + S_NOP 0 + + bb.1: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + G_BRCOND %3, %bb.1 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -108,6 +108,7 @@ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 ; WAVE64: bb.1: ; WAVE32-LABEL: name: brcond_si_if ; WAVE32: bb.0: @@ -116,6 +117,7 @@ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 ; WAVE32: bb.1: bb.0: successors: %bb.1 @@ -139,6 +141,7 @@ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; WAVE64: [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 ; WAVE64: bb.1: ; WAVE32-LABEL: name: brcond_si_else ; WAVE32: bb.0: @@ -147,6 +150,7 @@ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; WAVE32: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 ; WAVE32: bb.1: bb.0: successors: %bb.1 @@ -161,32 +165,148 @@ ... --- -name: brcond_si_loop +name: brcond_si_loop_brcond +tracksRegLiveness: true body: | - ; WAVE64-LABEL: name: brcond_si_loop + ; WAVE64-LABEL: name: brcond_si_loop_brcond ; WAVE64: bb.0: ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 - ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: bb.1: - ; WAVE32-LABEL: name: brcond_si_loop + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.2 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 0 + ; WAVE32-LABEL: name: brcond_si_loop_brcond ; WAVE32: bb.0: ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.2 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr0_sgpr1 + + bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 + %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + G_BRCOND %3, %bb.2 + G_BR %bb.1 + + bb.2: + S_NOP 0 +... + +# This usage is backwards from how the intrinsic is supposed to be +# used. +--- +name: brcond_si_loop_brcond_back +tracksRegLiveness: true +body: | + ; WAVE64-LABEL: name: brcond_si_loop_brcond_back + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 0 + ; WAVE32-LABEL: name: brcond_si_loop_brcond_back + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 0 bb.0: - successors: %bb.1 liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s64) = COPY $sgpr0_sgpr1 + + bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 G_BRCOND %3, %bb.1 + G_BR %bb.2 + + bb.2: + S_NOP 0 +... + +# This usage is backwards from how the intrinsic is supposed to be +# used. +--- +name: brcond_si_loop_brcond_back_fallthrough +tracksRegLiveness: true +body: | + ; WAVE64-LABEL: name: brcond_si_loop_brcond_back_fallthrough + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 + ; WAVE64: bb.2: + ; WAVE32-LABEL: name: brcond_si_loop_brcond_back_fallthrough + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 + ; WAVE32: bb.2: + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr0_sgpr1 bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 + %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + G_BRCOND %3, %bb.1 + + bb.2: ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -164,7 +164,7 @@ ; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] ; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] -; GFX9-NEXT: s_cbranch_execnz BB2_2 +; GFX9-NEXT: s_cbranch_execz BB2_2 ; GFX9-NEXT: ; %bb.1: ; %bb1 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CHECK-NEXT: s_cbranch_execnz BB0_2 +; CHECK-NEXT: s_cbranch_execz BB0_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 @@ -646,7 +646,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB2_2 +; CGP-NEXT: s_cbranch_execz BB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 @@ -814,7 +814,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB2_6 +; CGP-NEXT: s_cbranch_execz BB2_6 ; CGP-NEXT: ; %bb.5: ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 @@ -2318,7 +2318,7 @@ ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CHECK-NEXT: s_cbranch_execnz BB7_2 +; CHECK-NEXT: s_cbranch_execz BB7_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 @@ -2767,7 +2767,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB8_2 +; CGP-NEXT: s_cbranch_execz BB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 @@ -2935,7 +2935,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB8_6 +; CGP-NEXT: s_cbranch_execz BB8_6 ; CGP-NEXT: ; %bb.5: ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CHECK-NEXT: s_cbranch_execnz BB0_2 +; CHECK-NEXT: s_cbranch_execz BB0_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 @@ -642,7 +642,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB2_2 +; CGP-NEXT: s_cbranch_execz BB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 @@ -809,7 +809,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB2_6 +; CGP-NEXT: s_cbranch_execz BB2_6 ; CGP-NEXT: ; %bb.5: ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 @@ -2292,7 +2292,7 @@ ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CHECK-NEXT: s_cbranch_execnz BB7_2 +; CHECK-NEXT: s_cbranch_execz BB7_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 @@ -2738,7 +2738,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB8_2 +; CGP-NEXT: s_cbranch_execz BB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 @@ -2905,7 +2905,7 @@ ; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; CGP-NEXT: s_cbranch_execnz BB8_6 +; CGP-NEXT: s_cbranch_execz BB8_6 ; CGP-NEXT: ; %bb.5: ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9