diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2577,6 +2577,23 @@
   (V_BFE_I32_e64 $src, (i32 0), $width)
 >;
 
+// An OpenCL front-end, as per
+// https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_C.html#operators-shift
+// , can emit (and (sub 32, i32:$x) 31) instead of (sub 32, i32:$x) as the
+// second operand of a 32 bit shift expression. This operand can be transformed
+// to (and (sub 0, i32:$x), 31) by the optimizer.
+def : AMDGPUPat <
+  (DivergentBinFrag<srl> (shl_oneuse i32:$src, (and (sub 0, i32:$width), 31)),
+                         (and (sub 0, i32:$width), 31)),
+  (V_BFE_U32_e64 $src, (i32 0), $width)
+>;
+
+def : AMDGPUPat <
+  (DivergentBinFrag<sra> (shl_oneuse i32:$src, (and (sub 0, i32:$width), 31)),
+                         (and (sub 0, i32:$width), 31)),
+  (V_BFE_I32_e64 $src, (i32 0), $width)
+>;
+
 // SHA-256 Ma patterns
 
 // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -19,6 +19,16 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}shl_mask:
+; GCN: v_bfe_i32 v0, v0, 0, v1
+define i32 @shl_mask(i32 %a, i32 %bits) {
+  %sub = sub i32 0, %bits
+  %shl.mask = and i32 %sub, 31
+  %shl = shl i32 %a, %shl.mask
+  %shr = ashr i32 %shl, %shl.mask
+  ret i32 %shr
+}
+
 ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]