diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2510,6 +2510,13 @@ let SubtargetPredicate = NotHasAddNoCarryInsts; } +// Eliminate `and` for constrained shift since the target shift instructions are +// constrained. +class ConstrainedI32ShiftPat + : AMDGPUPat <(src i32:$a, (and i32:$b, 31)), (dst $b, $a)>; +def : ConstrainedI32ShiftPat; +def : ConstrainedI32ShiftPat; +def : ConstrainedI32ShiftPat; // Avoid pointlessly materializing a constant in VGPR. // FIXME: Should also do this for readlane, but tablegen crashes on diff --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll --- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll @@ -245,8 +245,7 @@ } ; GCN-LABEL: {{^}}trunc_shl_and31: -; GCN: s_and_b32 s[[AMT:[0-9]+]], s{{[0-9]+}}, 31 -; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}} +; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GCN-NOT: v_lshl_b64 ; GCN-NOT: v_lshlrev_b64 define amdgpu_kernel void @trunc_shl_and31(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) { diff --git a/llvm/test/CodeGen/AMDGPU/shift-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-opts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shift-opts.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx908 < %s | FileCheck %s + +define i32 @constrained_shift(i32 %a, i32 %b) { +; CHECK-LABEL: constrained_shift: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshlrev_b32_e32 v2, v1, v0 +; CHECK-NEXT: v_lshrrev_b32_e32 v3, v1, v0 +; CHECK-NEXT: v_ashrrev_i32_e32 v0, v1, v0 +; CHECK-NEXT: v_add3_u32 v0, v2, v3, v0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %and = and i32 %b, 31 + %shl = shl i32 %a, %and + %lshr = lshr i32 %a, %and + %ashr = ashr i32 %a, %and + %ret.0 = add i32 %shl, %lshr + %ret = add i32 %ret.0, %ashr + ret i32 %ret +}