Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1401,6 +1401,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { SDValue Cond = N->getOperand(1); + if (Cond.isUndef()) { + CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, + N->getOperand(2), N->getOperand(0)); + return; + } + if (isCBranchSCC(N)) { // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. SelectCode(N); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1742,6 +1742,15 @@ MI.eraseFromParent(); return BB; } + case AMDGPU::SI_BR_UNDEF: { + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)) + .addOperand(MI.getOperand(0)); + Br->getOperand(1).setIsUndef(true); // read undef SCC + MI.eraseFromParent(); + return BB; + } default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -247,6 +247,12 @@ } // End Uses = [EXEC], Defs = [EXEC,VCC] +// Branch on undef scc. Used to avoid intermediate copy from +// IMPLICIT_DEF to SCC. +def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> { + let isTerminator = 1; + let usesCustomInserter = 1; +} def SI_PS_LIVE : PseudoInstSI < (outs SReg_64:$dst), (ins), Index: test/CodeGen/AMDGPU/cgp-bitfield-extract.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -27,7 +27,7 @@ ; GCN-LABEL: {{^}}sink_ubfe_i32: ; GCN-NOT: lshr -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008 ; GCN: BB0_2: @@ -121,7 +121,7 @@ ; GCN-LABEL: {{^}}sink_ubfe_i16: ; GCN-NOT: lshr ; VI: s_bfe_u32 s0, s0, 0xc0004 -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 ; VI: s_and_b32 s0, s0, 0xff @@ -175,12 +175,13 @@ ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 -; GCN: s_cbranch_vccnz BB3_2 +; GCN: s_cbranch_scc1 BB3_2 +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff ; GCN: BB3_2: +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f ; GCN: BB3_3: @@ -225,7 +226,7 @@ ; GCN-LABEL: {{^}}sink_ubfe_i64_low32: -; GCN: s_cbranch_vccnz BB4_2 +; GCN: s_cbranch_scc1 BB4_2 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f @@ -273,7 +274,7 @@ ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_high32: -; GCN: s_cbranch_vccnz BB5_2 +; GCN: s_cbranch_scc1 BB5_2 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 ; GCN: BB5_2: Index: test/CodeGen/AMDGPU/i1-copy-implicit-def.ll =================================================================== --- test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -4,7 +4,7 @@ ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_cbranch_vccnz +; SI-NEXT: s_cbranch_scc1 define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: br i1 undef, label %bb1, label %bb2 Index: test/CodeGen/AMDGPU/ret_jump.ll =================================================================== --- test/CodeGen/AMDGPU/ret_jump.ll +++ test/CodeGen/AMDGPU/ret_jump.ll @@ -5,7 +5,7 @@ ; Mask should be in original state after executed unreachable block ; GCN-LABEL: {{^}}main: -; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]] +; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]] ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc ; GCN-NEXT: s_xor_b64 [[XOR_EXEC:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_EXEC]] Index: test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,7 @@ ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; GCN: s_endpgm ; GCN: .Lfunc_end1 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 { Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -263,7 +263,7 @@ ; CHECK-NEXT: s_endpgm ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: -; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]] ; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] Index: test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll =================================================================== --- test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll +++ test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll @@ -40,7 +40,7 @@ ; COMMON-LABEL: {{^}}branch_false: ; SI: s_cbranch_vccnz -; SI: s_cbranch_vccnz +; SI: s_cbranch_scc1 ; SI: s_endpgm define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 { entry: @@ -76,8 +76,8 @@ } ; COMMON-LABEL: {{^}}branch_undef: -; SI: s_cbranch_vccnz -; SI: s_cbranch_vccnz +; SI: s_cbranch_scc1 +; SI: s_cbranch_scc1 ; SI: s_endpgm define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 { entry: