diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -81,8 +81,11 @@ RegClassOrBank.dyn_cast(); if (RC) { const LLT Ty = MRI.getType(Reg); - return RC->hasSuperClassEq(TRI.getBoolRC()) && - Ty.isValid() && Ty.getSizeInBits() == 1; + if (!Ty.isValid() || Ty.getSizeInBits() != 1) + return false; + // G_TRUNC s1 result is never vcc. + return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC && + RC->hasSuperClassEq(TRI.getBoolRC()); } const RegisterBank *RB = RegClassOrBank.get(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -5352,13 +5352,15 @@ ; GFX10-NEXT: s_sub_i32 s0, s4, 64 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] -; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: s_and_b32 s0, 1, s1 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX10-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10-NEXT: s_and_b32 s1, 1, vcc_lo -; GFX10-NEXT: s_and_b32 s0, 1, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: s_and_b32 s0, 1, s5 +; GFX10-NEXT: s_and_b32 s1, 1, s1 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo @@ -5548,52 +5550,52 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX10-NEXT: s_andn2_b64 s[10:11], s[6:7], s[4:5] -; GFX10-NEXT: s_sub_i32 s4, 64, s8 +; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX10-NEXT: s_sub_i32 s5, s8, 64 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] +; GFX10-NEXT: s_sub_i32 s6, 64, s8 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] -; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] +; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] +; GFX10-NEXT: s_cselect_b32 s10, 1, 0 +; GFX10-NEXT: s_and_b32 s6, 1, s9 +; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: s_lshl_b32 s9, s2, 31 +; GFX10-NEXT: s_mov_b32 s8, s7 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] -; GFX10-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: s_and_b32 s5, 1, s10 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 +; GFX10-NEXT: s_sub_i32 s10, s4, 64 +; GFX10-NEXT: s_sub_i32 s8, 64, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 -; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 -; GFX10-NEXT: s_lshl_b32 s5, s2, 31 -; GFX10-NEXT: s_and_b32 s6, 1, s6 -; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s4 -; GFX10-NEXT: s_mov_b32 s4, s7 -; GFX10-NEXT: s_sub_i32 s11, s10, 64 -; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] -; GFX10-NEXT: s_sub_i32 s7, 64, s10 -; GFX10-NEXT: s_cmp_lt_u32 s10, 64 +; GFX10-NEXT: s_cmp_lt_u32 s4, 64 +; GFX10-NEXT: s_cselect_b32 s11, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s10, 0 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 +; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 -; GFX10-NEXT: s_cselect_b32 s13, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s10 -; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s7 -; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s10 -; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s11 -; GFX10-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 +; GFX10-NEXT: s_lshr_b64 s[4:5], s[2:3], s4 +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 +; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo -; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] -; GFX10-NEXT: s_cmp_lg_u32 s13, 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] +; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] -; GFX10-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -5470,13 +5470,15 @@ ; GFX10-NEXT: s_sub_i32 s0, s8, 64 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] -; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: s_and_b32 s0, 1, s1 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX10-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10-NEXT: s_and_b32 s1, 1, vcc_lo -; GFX10-NEXT: s_and_b32 s0, 1, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: s_and_b32 s0, 1, s6 +; GFX10-NEXT: s_and_b32 s1, 1, s1 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo @@ -5665,39 +5667,39 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 ; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] -; GFX10-NEXT: s_andn2_b64 s[8:9], s[6:7], s[4:5] -; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] +; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX10-NEXT: s_sub_i32 s4, 64, s8 -; GFX10-NEXT: s_sub_i32 s5, s8, 64 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] -; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 -; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] +; GFX10-NEXT: s_sub_i32 s6, 64, s4 +; GFX10-NEXT: s_sub_i32 s5, s4, 64 +; GFX10-NEXT: s_cmp_lt_u32 s4, 64 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 -; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] +; GFX10-NEXT: s_cselect_b32 s9, 1, 0 +; GFX10-NEXT: s_and_b32 s4, 1, s7 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 -; GFX10-NEXT: s_sub_i32 s10, s6, 64 +; GFX10-NEXT: s_and_b32 s4, 1, s9 +; GFX10-NEXT: s_sub_i32 s10, s8, 64 +; GFX10-NEXT: s_sub_i32 s6, 64, s8 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo +; GFX10-NEXT: s_cselect_b32 s11, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s4 -; GFX10-NEXT: s_and_b32 s4, 1, s7 -; GFX10-NEXT: s_sub_i32 s7, 64, s6 -; GFX10-NEXT: s_cmp_lt_u32 s6, 64 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 -; GFX10-NEXT: s_cselect_b32 s11, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s6 -; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s7 -; GFX10-NEXT: s_lshr_b64 s[6:7], s[2:3], s6 -; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 +; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo @@ -5707,7 +5709,7 @@ ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll @@ -41,10 +41,10 @@ ; WAVE32-LABEL: i1_sgpr_to_vcc_copy: ; WAVE32: ; %bb.0: ; %main_body ; WAVE32-NEXT: s_cmp_eq_u32 s0, 2 -; WAVE32-NEXT: s_cselect_b32 vcc_lo, 1, 0 -; WAVE32-NEXT: s_and_b32 s0, 1, vcc_lo -; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; WAVE32-NEXT: s_cselect_b32 s0, 1, 0 +; WAVE32-NEXT: s_and_b32 s0, 1, s0 ; WAVE32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; WAVE32-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo ; WAVE32-NEXT: exp mrt0 v0, v1, v0, v0 done vm ; WAVE32-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir @@ -109,14 +109,15 @@ ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]] - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY6]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY7]], [[COPY8]], -1, 0, 15, implicit $exec + ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0