diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -74,6 +74,15 @@ // We end up with this pattern sometimes after basic block placement. // It happens while combining a block which assigns -1 or 0 to a saved mask // and another block which consumes that saved mask and then a branch. + // + // While searching this also performs the following substitution: + // vcc = V_CMP + // vcc = S_AND exec, vcc + // S_CBRANCH_VCC[N]Z + // => + // vcc = V_CMP + // S_CBRANCH_VCC[N]Z + bool Changed = false; MachineBasicBlock &MBB = *MI.getParent(); const GCNSubtarget &ST = MBB.getParent()->getSubtarget(); @@ -121,14 +130,26 @@ SReg = Op2.getReg(); auto M = std::next(A); bool ReadsSreg = false; + bool ModifiesExec = false; for (; M != E; ++M) { if (M->definesRegister(SReg, TRI)) break; if (M->modifiesRegister(SReg, TRI)) return Changed; ReadsSreg |= M->readsRegister(SReg, TRI); + ModifiesExec |= M->modifiesRegister(ExecReg, TRI); + } + if (M == E) + return Changed; + // If SReg is VCC and SReg definition is a VALU comparison. + // This means S_AND with EXEC is not required. + // Erase the S_AND and return. + if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec && + (M->isCompare() || TII->isVOPC(*M)) && TII->isVALU(*M)) { + A->eraseFromParent(); + return true; } - if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() || + if (!M->isMoveImmediate() || !M->getOperand(1).isImm() || (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0)) return Changed; MaskValue = M->getOperand(1).getImm(); diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -623,7 +623,6 @@ ; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[12:13] ; GFX908-NEXT: v_add_co_u32_e64 v14, s[2:3], v14, v6 ; GFX908-NEXT: v_addc_co_u32_e64 v15, s[2:3], v15, v7, s[2:3] -; GFX908-NEXT: s_and_b64 vcc, exec, vcc ; GFX908-NEXT: s_cbranch_vccz .LBB3_1 ; GFX908-NEXT: .LBB3_5: ; %bb16 ; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1 @@ -751,7 +750,6 @@ ; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, v16, v10 ; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v11, vcc ; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] -; GFX90A-NEXT: s_and_b64 vcc, exec, vcc ; GFX90A-NEXT: s_cbranch_vccz .LBB3_1 ; GFX90A-NEXT: .LBB3_5: ; %bb16 ; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll --- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll @@ -82,7 +82,6 @@ ; GFX7-NEXT: s_cmp_lg_u32 s0, 0 ; GFX7-NEXT: s_addc_u32 s0, s2, 0 ; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0 -; GFX7-NEXT: s_and_b64 vcc, exec, vcc ; GFX7-NEXT: s_cbranch_vccnz .LBB1_2 ; GFX7-NEXT: ; %bb.1: ; %bb0 ; GFX7-NEXT: v_mov_b32_e32 v0, 0 @@ -109,7 +108,6 @@ ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX9-NEXT: s_addc_u32 s0, s2, 0 ; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0 -; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: s_cbranch_vccnz .LBB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 @@ -136,7 +134,6 @@ ; GFX10-NEXT: s_cmpk_lg_u32 s1, 0x0 ; GFX10-NEXT: s_addc_u32 s0, s0, 0 ; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0 -; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: s_cbranch_vccnz .LBB1_2 ; GFX10-NEXT: ; %bb.1: ; %bb0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll --- a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll @@ -71,7 +71,6 @@ ; GCN-NEXT: global_load_short_d16_hi v0, v[2:3], off glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccz .LBB1_1 ; GCN-NEXT: ; %bb.2: ; %bb2 ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -431,7 +431,6 @@ ; GFX9-NEXT: v_mad_f32 v0, -v0, v2, v8 ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4 ; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, v2 -; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], 0, v7, s[0:1] ; GFX9-NEXT: global_store_short v[5:6], v0, off ; GFX9-NEXT: s_cbranch_vccz .LBB4_1 @@ -516,7 +515,6 @@ ; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, v10, s[2:3] ; GFX9-NEXT: v_mul_lo_u32 v8, v8, s7 ; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5 -; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1] ; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8 ; GFX9-NEXT: global_store_short v[5:6], v0, off @@ -552,7 +550,6 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v8, vcc_lo ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, s4 -; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v7 ; GFX10-NEXT: global_store_short v[5:6], v0, off ; GFX10-NEXT: s_cbranch_vccz .LBB5_1 @@ -608,7 +605,6 @@ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v7|, |v2| ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1] -; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: v_add_u32_e32 v0, v8, v0 ; GFX9-NEXT: global_store_short v[5:6], v0, off ; GFX9-NEXT: s_cbranch_vccz .LBB6_1 @@ -701,7 +697,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v8, s5 ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s7, v4 ; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5 -; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1] ; GFX9-NEXT: v_sub_u32_e32 v0, v7, v0 ; GFX9-NEXT: global_store_short v[5:6], v0, off @@ -741,7 +736,6 @@ ; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5 ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, s1 -; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v7, v0 ; GFX10-NEXT: global_store_short v[5:6], v0, off ; GFX10-NEXT: s_cbranch_vccz .LBB7_1 diff --git a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir --- a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir @@ -535,3 +535,119 @@ S_CBRANCH_VCCZ %bb.1, implicit $vcc S_ENDPGM 0 ... +--- +# GCN-LABEL: name: and_cmp_vccz +# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec +# GCN-NOT: S_AND_ +# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc +name: and_cmp_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_cmp_vccnz +# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec +# GCN-NOT: S_AND_ +# GCN: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc +name: and_cmp_vccnz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_cmp_vccz +# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec +# GCN: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc +# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc +name: andn2_cmp_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_cmpclass_vccz +# GCN: V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec +# GCN-NOT: S_AND_ +# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc +name: and_cmpclass_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_cmpx_vccz +# GCN: V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec +# GCN-NOT: S_AND_ +# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc +name: and_cmpx_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_or_cmp_vccz +# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec +# GCN: $exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc +# GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc +# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc +name: and_or_cmp_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + $exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -195,7 +195,6 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 s[6:7], -1 ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GCN-NEXT: s_mov_b64 s[10:11], -1 ; GCN-NEXT: s_cbranch_vccnz .LBB1_6 @@ -203,7 +202,6 @@ ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_mov_b64 s[6:7], -1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_mov_b64 s[8:9], -1 ; GCN-NEXT: s_cbranch_vccz .LBB1_5 ; GCN-NEXT: ; %bb.4: ; %case1 @@ -223,7 +221,6 @@ ; GCN-NEXT: ; %bb.7: ; %LeafBlock ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_mov_b64 s[8:9], -1 ; GCN-NEXT: s_cbranch_vccz .LBB1_1 ; GCN-NEXT: ; %bb.8: ; %case0 diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -151,7 +151,6 @@ ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccnz .LBB1_6 ; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph ; GCN-NEXT: s_load_dword s4, s[0:1], 0x0 @@ -176,7 +175,6 @@ ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccnz .LBB1_4 ; GCN-NEXT: ; %bb.5: ; %bb21 ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -438,7 +438,6 @@ ; {{^}}sopc_vopc_legalize_bug: ; GCN: s_load_dword [[SGPR:s[0-9]+]] ; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}} -; GCN: s_and_b64 vcc, exec, vcc ; GCN: s_cbranch_vccnz [[EXIT:.L[A-Z0-9_]+]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-NOHSA: buffer_store_dword [[ONE]] diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -217,7 +217,6 @@ ; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1 @@ -1071,7 +1070,6 @@ ; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3 ; GCN-IR-NEXT: .LBB9_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1 @@ -1283,7 +1281,6 @@ ; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[10:11], 1 diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -750,7 +750,6 @@ ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_b64 vcc, exec, vcc ; SI-NEXT: s_cbranch_vccnz .LBB10_2 ; SI-NEXT: .LBB10_4: ; %Flow1 ; SI-NEXT: s_or_b64 exec, exec, s[4:5] @@ -796,7 +795,6 @@ ; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc ; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1 ; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] @@ -840,7 +838,6 @@ ; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1 ; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 @@ -901,7 +898,6 @@ ; SI-NEXT: v_mov_b32_e32 v0, 4.0 ; SI-NEXT: .LBB11_3: ; %phibb ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 -; SI-NEXT: s_and_b64 vcc, exec, vcc ; SI-NEXT: s_cbranch_vccz .LBB11_5 ; SI-NEXT: ; %bb.4: ; %bb10 ; SI-NEXT: s_mov_b32 s3, 0xf000 @@ -934,7 +930,6 @@ ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb ; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 -; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc ; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5 ; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 @@ -965,7 +960,6 @@ ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb ; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 -; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5 ; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -189,7 +189,6 @@ ; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[10:11], 1 @@ -1081,7 +1080,6 @@ ; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[14:15], s[6:7] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], 1 @@ -1243,7 +1241,6 @@ ; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3 ; GCN-IR-NEXT: .LBB9_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], 1 @@ -1457,7 +1454,6 @@ ; GCN-IR-NEXT: s_addc_u32 s7, s7, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1 diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -363,7 +363,6 @@ ; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc ; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) ; NOHSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc ; NOHSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 ; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 @@ -381,7 +380,6 @@ ; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc ; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) ; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc ; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 ; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 @@ -399,7 +397,6 @@ ; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc ; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc ; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 ; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 @@ -486,7 +483,6 @@ ; HSA-TRAP-GFX803-V2-NEXT: flat_load_dword v0, v[0:1] glc ; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX803-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V2-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX803-V2-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX803-V2-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 @@ -508,7 +504,6 @@ ; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc ; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V3-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 @@ -530,7 +525,6 @@ ; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc ; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V4-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 @@ -619,7 +613,6 @@ ; HSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 @@ -638,7 +631,6 @@ ; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 @@ -657,7 +649,6 @@ ; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc ; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret ; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 @@ -743,7 +734,6 @@ ; HSA-NOTRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) ; HSA-NOTRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc ; HSA-NOTRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret ; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 @@ -761,7 +751,6 @@ ; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) ; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc ; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret ; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 @@ -779,7 +768,6 @@ ; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc ; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc ; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 ; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret ; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -190,7 +190,6 @@ ; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[6:7] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1 @@ -879,7 +878,6 @@ ; GCN-IR-NEXT: s_addc_u32 s7, s7, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[0:1] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1 @@ -1070,7 +1068,6 @@ ; GCN-IR-NEXT: s_addc_u32 s7, s7, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1 @@ -1527,7 +1524,6 @@ ; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_3 ; GCN-IR-NEXT: .LBB11_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1 diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -118,7 +118,6 @@ ; Using a floating-point value in an integer compare will cause the compare to ; be selected for the SALU and then later moved to the VALU. ; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] -; GCN: s_and_b64 vcc, exec, [[COND]] ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:.L[0-9_A-Za-z]+]] ; GCN: buffer_store_dword ; GCN: [[ENDIF_LABEL]]: @@ -143,7 +142,6 @@ ; Using a floating-point value in an integer compare will cause the compare to ; be selected for the SALU and then later moved to the VALU. ; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] -; GCN: s_and_b64 vcc, exec, [[COND]] ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:.L[0-9_A-Za-z]+]] ; GCN: buffer_store_dword ; GCN: [[ENDIF_LABEL]]: diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -19,11 +19,9 @@ ; GCN-NEXT: flat_load_dword v0, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-NEXT: ; %bb.1: ; %bb4 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccnz .LBB0_4 ; GCN-NEXT: ; %bb.2: ; %bb7 ; GCN-NEXT: s_getpc_b64 s[16:17] @@ -33,7 +31,6 @@ ; GCN-NEXT: s_branch .LBB0_7 ; GCN-NEXT: .LBB0_3: ; %bb2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0 -; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccnz .LBB0_6 ; GCN-NEXT: .LBB0_4: ; %bb9 ; GCN-NEXT: s_getpc_b64 s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -189,7 +189,6 @@ ; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[10:11], 1 @@ -887,7 +886,6 @@ ; GCN-IR-NEXT: s_addc_u32 s7, s7, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_3 ; GCN-IR-NEXT: .LBB6_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1 @@ -1077,7 +1075,6 @@ ; GCN-IR-NEXT: s_addc_u32 s7, s7, 0 ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] -; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], 1 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -495,9 +495,7 @@ ; GCN-LABEL: {{^}}test_br_cc_f16: ; GFX1032: v_cmp_nlt_f16_e32 vcc_lo, -; GFX1032: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX1064: v_cmp_nlt_f16_e32 vcc, -; GFX1064: s_and_b64 vcc, exec, vcc{{$}} ; GCN-NEXT: s_cbranch_vccnz define amdgpu_kernel void @test_br_cc_f16( half addrspace(1)* %r, diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -1864,7 +1864,6 @@ ; GFX9-W64-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-W64-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-W64-NEXT: v_mov_b32_e32 v3, v7 -; GFX9-W64-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-W64-NEXT: s_cbranch_vccz .LBB31_1 ; GFX9-W64-NEXT: ; %bb.3: ; GFX9-W64-NEXT: s_mov_b64 s[2:3], -1 @@ -1914,7 +1913,6 @@ ; GFX10-W32-NEXT: v_mov_b32_e32 v6, v2 ; GFX10-W32-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-W32-NEXT: v_mov_b32_e32 v4, v0 -; GFX10-W32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-W32-NEXT: s_cbranch_vccz .LBB31_1 ; GFX10-W32-NEXT: ; %bb.3: ; GFX10-W32-NEXT: s_mov_b32 s1, -1