diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2269,6 +2269,34 @@ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; +def IMMBitSelConst : SDNodeXFormgetTargetConstant(1 << N->getZExtValue(), SDLoc(N), + MVT::i32); +}]>; + +// Matching separate SRL and TRUNC instructions +// with dependent operands (SRL dest is source of TRUNC) +// generates three instructions. However, by using bit shifts, +// the V_LSHRREV_B32_e64 result can be directly used in the +// operand of the V_AND_B32_e64 instruction: +// (trunc i32 (srl i32 $a, i32 $b)) -> +// v_and_b32_e64 $a, (1 << $b), $a +// v_cmp_ne_u32_e64 $a, 0, $a + +// Handle the VALU case. +def : GCNPat < + (i1 (DivergentUnaryFrag (i32 (srl i32:$a, (i32 imm:$b))))), + (V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a), + (i32 0)) +>; + +// Handle the scalar case. +def : GCNPat < + (i1 (UniformUnaryFrag (i32 (srl i32:$a, (i32 imm:$b))))), + (S_CMP_LG_U32 (S_AND_B32 (i32 (IMMBitSelConst $b)), $a), + (i32 0)) +>; + def : GCNPat < (i1 (DivergentUnaryFrag i64:$a)), (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -9,18 +9,16 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[V_LSHRREV_B32_e64_]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec - ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 2, [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 killed [[V_AND_B32_e64_]], 0, implicit $exec + ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1.out.true: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_1]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2.UnifiedReturnBlock: @@ -56,14 +54,12 @@ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def dead $scc - ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_1]], implicit-def dead $scc - ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LSHR_B32_]], implicit-def dead $scc - ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_1]], 1, implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 2, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_1]], 0, implicit-def $scc ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $scc ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]] - ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], killed [[S_MOV_B32_2]], implicit-def $scc + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} @@ -72,9 +68,9 @@ ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY3]], killed [[S_MOV_B64_]], implicit-def dead $scc ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[S_MOV_B32_4]], %subreg.sub2, killed [[S_MOV_B32_3]], %subreg.sub3 + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 + ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[S_MOV_B32_3]], %subreg.sub2, killed [[S_MOV_B32_2]], %subreg.sub3 ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -82,9 +78,9 @@ ; GCN-NEXT: bb.2.out.else: ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY7]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[S_MOV_B32_6]], %subreg.sub2, killed [[S_MOV_B32_5]], %subreg.sub3 + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 + ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY7]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[S_MOV_B32_5]], %subreg.sub2, killed [[S_MOV_B32_4]], %subreg.sub3 ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY3]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_1]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -15,13 +15,11 @@ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]] - ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LSHR_B32_]], implicit-def dead $scc - ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec @@ -173,4 +171,3 @@ %select = select i1 %setcc, i1 true, i1 %z ret i1 %select } -