diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2269,6 +2269,31 @@ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; +// Restrict the range to prevent using an additional VGPR +// for the shifted value. +def IMMBitSelRange : ImmLeaf 0 && Imm < 16; +}]>; + +def IMMBitSelConst : SDNodeXFormgetTargetConstant((1 << N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + +// Matching separate SRL and TRUNC instructions +// with dependent operands (SRL dest is source of TRUNC) +// generates three instructions. However, by using bit shifts, +// the V_LSHRREV_B32_e64 result can be directly used in the +// operand of the V_AND_B32_e64 instruction: +// (trunc i32 (srl i32 $a, i32 $b)) -> +// v_and_b32_e64 $a, (1 << $b), $a +// v_cmp_eq_u32_e64 $a, (1 << $b), $a +def : GCNPat < + (i1 (trunc (i32 (srl i32:$a, IMMBitSelRange:$b)))), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a), + (i32 (IMMBitSelConst $b))) +>; + def : GCNPat < (i1 (DivergentUnaryFrag i64:$a)), (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -stop-after=amdgpu-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: bb.0.entry: +; GCN-NOT: V_LSHRREV_B32_e64 +; GCN: V_AND_B32_e64 2 +; GCN: V_CMP_EQ_U32_e64 killed {{.*}}, 2 +define i32 @opt_lshr_and_cmp(i32 %x) { +entry: + %0 = and i32 %x, 2 + %1 = icmp eq i32 %0, 0 + %2 = xor i1 %1, -1 + br i1 %2, label %out.true, label %out.else + +out.true: + %3 = shl i32 %x, 2 + ret i32 %3 + +out.else: + ret i32 %x +}