Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1407,26 +1407,12 @@ return; } - // The result of VOPC instructions is or'd against ~EXEC before it is - // written to vcc or another SGPR. This means that the value '1' is always - // written to the corresponding bit for results that are masked. In order - // to correctly check against vccz, we need to and VCC with the EXEC - // register in order to clear the value from the masked bits. - SDLoc SL(N); - SDNode *MaskedCond = - CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, - CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), - Cond); - SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, - SDValue(MaskedCond, 0), - SDValue()); // Passing SDValue() adds a - // glue output. + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, N->getOperand(2), // Basic Block - VCC.getValue(0), // Chain - VCC.getValue(1)); // Glue + VCC.getValue(0)); return; } Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -89,10 +89,9 @@ ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch: ; GCN: s_load_dword [[CND:s[0-9]+]] -; GCN-DAG: v_cmp_eq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CND]], 0 ; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]] -; GCN: s_and_b64 vcc, exec, [[CMP]] -; GCN-NEXT: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] +; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0 +; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 ; GCN-NEXT: s_getpc_b64 vcc @@ -434,7 +433,7 @@ ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: ;;#ASMEND -; GCN-NEXT: s_and_b64 vcc, exec, -1{{$}} +; GCN-NEXT: s_mov_b64 vcc, -1{{$}} ; GCN-NEXT: s_cbranch_vccz [[RET]] ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body @@ -478,14 +477,13 @@ ; GCN-LABEL: {{^}}long_branch_hang: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 ; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]] - -; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( -; GCN: s_setpc_b64 +; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_BR_0]]: +; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( ; GCN: s_setpc_b64 -; GCN-NEXT: [[LONG_BR_DEST0]]: +; GCN: [[SHORTB]]: ; GCN-DAG: v_cmp_lt_i32 ; GCN-DAG: v_cmp_gt_i32 ; GCN: s_cbranch_vccnz @@ -493,6 +491,7 @@ ; GCN: s_setpc_b64 ; GCN: s_setpc_b64 +; GCN: [[LONG_BR_DEST0]] ; GCN: s_cmp_eq_u32 ; GCN-NEXT: s_cbranch_scc0 ; GCN: s_setpc_b64 Index: test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -97,7 +97,6 @@ ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; GCN: v_cmp_eq_u32_e32 vcc, 1, -; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]] ; GCN: s_cbranch_vccnz [[LOOPBB]] ; GCN-NEXT: ; BB#2 Index: test/CodeGen/AMDGPU/cgp-bitfield-extract.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -170,13 +170,12 @@ ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_cbranch_vccnz BB3_2 -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff ; GCN: BB3_2: -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f ; GCN: BB3_3: Index: test/CodeGen/AMDGPU/i1-copy-implicit-def.ll =================================================================== --- test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -1,10 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_and_b64 vcc, exec ; SI-NEXT: s_cbranch_vccnz define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: Index: test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,6 @@ ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_and_b64 vcc ; GCN: s_cbranch_vccnz ; GCN: s_endpgm ; GCN: .Lfunc_end1 Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -262,13 +262,11 @@ ; CHECK-NEXT: s_endpgm ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: -; CHECK: s_and_b64 vcc, exec, ; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]] ; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] -; CHECK: s_and_b64 vcc, exec, vcc -; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb10 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 @@ -303,16 +301,14 @@ ; CHECK-LABEL: {{^}}no_skip_no_successors: ; CHECK: v_cmp_nge_f32 -; CHECK: s_and_b64 vcc, exec, -; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb6 ; CHECK: s_mov_b64 exec, 0 ; CHECK: [[SKIPKILL]]: ; CHECK: v_cmp_nge_f32 -; CHECK: s_and_b64 vcc, exec, vcc -; CHECK: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]] ; CHECK: [[UNREACHABLE]]: ; CHECK-NEXT: .Lfunc_end{{[0-9]+}} Index: test/CodeGen/AMDGPU/smrd-vccz-bug.ll =================================================================== --- test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -4,8 +4,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 -; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN: s_and_b64 vcc, exec, [[MASK]] +; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: s_waitcnt lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc ; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc @@ -29,7 +28,6 @@ ; GCN-FUNC: {{^}}vccz_noworkaround: ; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}} -; GCN: s_and_b64 vcc, exec, vcc ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]] ; GCN: buffer_store_dword ; GCN: [[EXIT]]: Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -32,7 +32,6 @@ ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. ; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: s_and_b64 vcc, exec, [[COND]] ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -89,7 +88,6 @@ ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. ; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: s_and_b64 vcc, exec, [[COND]] ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -253,8 +251,7 @@ ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] -; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}} -; GCN: s_and_b64 vcc, exec, [[MASK]] +; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} ; GCN: s_cbranch_vccnz [[EXIT]] ; GCN: buffer_store ; GCN: {{^}}[[EXIT]]: @@ -439,7 +436,6 @@ ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: v_cmp_eq_u64_e64 -; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -471,7 +467,6 @@ ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: v_cmp_ne_u64_e64 -; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -500,7 +495,6 @@ ; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt: ; GCN: v_cmp_gt_i64_e64 -; GCN: s_and_b64 vcc, exec, ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -355,12 +355,11 @@ ; CHECK: s_wqm_b64 exec, exec ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000 -; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] -; CHECK: [[LOOPHDR]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz +; CHECK: s_cbranch_vccz [[LOOPHDR]] ; CHECK: ; %break ; CHECK: ; return