diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -116,6 +116,28 @@
             NameOverride<"vmulq">;
 }
 
+let params = !listconcat(T.Int16, T.Int32) in {
+  let pnt = PNT_None in {
+    def vmvnq_n: Intrinsic<Vector, (args imm_simd_vmvn:$imm),
+                           (not (splat (Scalar $imm)))>;
+  }
+  defm vmvnq: IntrinsicMX<Vector, (args imm_simd_vmvn:$imm, Predicate:$pred),
+                     (select $pred, (not (splat (Scalar $imm))), $inactive),
+                     1, "_n", PNT_NType, PNT_None>;
+  let pnt = PNT_NType in {
+    def vbicq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm),
+                           (and $v, (not (splat (Scalar $imm))))>;
+    def vorrq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm),
+                           (or $v, (splat (Scalar $imm)))>;
+  }
+  def vbicq_m_n: Intrinsic<
+    Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred),
+    (IRInt<"bic_imm_predicated", [Vector, Predicate]> $v, (u32 $imm), $pred)>;
+  def vorrq_m_n: Intrinsic<
+    Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred),
+    (IRInt<"orr_imm_predicated", [Vector, Predicate]> $v, (u32 $imm), $pred)>;
+}
+
 // The bitcasting below is not overcomplicating the IR because while
 // Vector and UVector may be different vector types at the C level i.e.
 // vectors of same size signed/unsigned ints. Once they're lowered
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -319,6 +319,7 @@
   int base = base_;
   Type type = type_;
 }
+def IB_ExtraArg_LaneSize;
 
 // -----------------------------------------------------------------------------
 // End-user definitions for immediate arguments.
@@ -327,11 +328,13 @@
 // intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit
 // value shifted left by a whole number of bytes; imm_simd_vmvn can also be of
 // the form 0xXXFF for some byte value XX.
-def imm_simd_restrictive : Immediate<u32, IB_UEltValue> {
+def imm_simd_restrictive : Immediate<Scalar, IB_UEltValue> {
   let extra = "ShiftedByte";
+  let extraarg = "!lanesize";
 }
-def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
+def imm_simd_vmvn : Immediate<Scalar, IB_UEltValue> {
   let extra = "ShiftedByteOrXXFF";
+  let extraarg = "!lanesize";
 }
 
 // imm_1toN can take any value from 1 to N inclusive, where N is the number of
@@ -457,26 +460,31 @@
 
 // A wrapper to define both _m and _x versions of a predicated
 // intrinsic.
+//
+// We provide optional parameters to override the polymorphic name
+// types separately for the _m and _x variants, because sometimes they
+// polymorph differently (typically because the type of the inactive
+// parameter can be used as a disambiguator if it's present).
 multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
                        int wantXVariant = 1,
                        string nameSuffix = "",
+                       PolymorphicNameType pnt_m = PNT_Type,
                        PolymorphicNameType pnt_x = PNT_Type> {
   // The _m variant takes an initial parameter called $inactive, which
   // provides the input value of the output register, i.e. all the
   // inactive lanes in the predicated operation take their values from
   // this.
   def "_m" # nameSuffix:
-     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>;
+     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> {
+    let pnt = pnt_m;
+  }
 
   foreach unusedVar = !if(!eq(wantXVariant, 1), [1], []<int>) in {
     // The _x variant leaves off that parameter, and simply uses an
     // undef value of the same type.
+
     def "_x" # nameSuffix:
-       Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
-      // Allow overriding of the polymorphic name type, because
-      // sometimes the _m and _x variants polymorph differently
-      // (typically because the type of the inactive parameter can be
-      // used as a disambiguator if it's present).
+      Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
       let pnt = pnt_x;
     }
   }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11601,8 +11601,10 @@
   bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum,
                                       unsigned Multiple);
   bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum);
-  bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum);
-  bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum);
+  bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum,
+                                         unsigned ArgBits);
+  bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum,
+                                               unsigned ArgBits);
   bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
                                 int ArgNum, unsigned ExpectedFieldNum,
                                 bool AllowName);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6412,7 +6412,8 @@
 /// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is
 /// a constant expression representing an arbitrary byte value shifted left by
 /// a multiple of 8 bits.
-bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
+bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum,
+                                             unsigned ArgBits) {
   llvm::APSInt Result;
 
   // We can't check the value of a dependent argument.
@@ -6424,6 +6425,10 @@
   if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
     return true;
 
+  // Truncate to the given size.
+  Result = Result.getLoBits(ArgBits);
+  Result.setIsUnsigned(true);
+
   if (IsShiftedByte(Result))
     return false;
 
@@ -6437,7 +6442,8 @@
 /// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some
 /// Arm MVE intrinsics.
 bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall,
-                                                   int ArgNum) {
+                                                   int ArgNum,
+                                                   unsigned ArgBits) {
   llvm::APSInt Result;
 
   // We can't check the value of a dependent argument.
@@ -6449,6 +6455,10 @@
   if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
     return true;
 
+  // Truncate to the given size.
+  Result = Result.getLoBits(ArgBits);
+  Result.setIsUnsigned(true);
+
   // Check to see if it's in either of the required forms.
   if (IsShiftedByte(Result) ||
       (Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF))
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
@@ -0,0 +1,394 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vbicq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vbicq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xd500);
+#else /* POLYMORPHIC */
+    return vbicq_n_s16(a, 0xd500);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -252, i32 -252, i32 -252, i32 -252>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vbicq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xfb);
+#else /* POLYMORPHIC */
+    return vbicq_n_s32(a, 0xfb);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vbicq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xf2);
+#else /* POLYMORPHIC */
+    return vbicq_n_u16(a, 0xf2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -8193, i32 -8193, i32 -8193, i32 -8193>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vbicq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0x2000);
+#else /* POLYMORPHIC */
+    return vbicq_n_u32(a, 0x2000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vorrq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0xc3);
+#else /* POLYMORPHIC */
+    return vorrq_n_s16(a, 0xc3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 65536, i32 65536, i32 65536, i32 65536>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vorrq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0x10000);
+#else /* POLYMORPHIC */
+    return vorrq_n_s32(a, 0x10000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vorrq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0xf000);
+#else /* POLYMORPHIC */
+    return vorrq_n_u16(a, 0xf000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 8978432, i32 8978432, i32 8978432, i32 8978432>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vorrq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0x890000);
+#else /* POLYMORPHIC */
+    return vorrq_n_u32(a, 0x890000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> <i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391>
+//
+int16x8_t test_vmvnq_n_s16()
+{
+    return vmvnq_n_s16(0x9500);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 -5570561, i32 -5570561, i32 -5570561, i32 -5570561>
+//
+int32x4_t test_vmvnq_n_s32()
+{
+    return vmvnq_n_s32(0x550000);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> <i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689>
+//
+uint16x8_t test_vmvnq_n_u16()
+{
+    return vmvnq_n_u16(0x4900);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 1023410175, i32 1023410175, i32 1023410175, i32 1023410175>
+//
+uint32x4_t test_vmvnq_n_u32()
+{
+    return vmvnq_n_u32(0xc3000000);
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.bic.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 11264, <8 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x2c00, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_s16(a, 0x2c00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13893632, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0xd40000, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_s32(a, 0xd40000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.bic.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 36, <8 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x24, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_u16(a, 0x24, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1644167168, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x62000000, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_u32(a, 0x62000000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.orr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13568, <8 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x3500, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_s16(a, 0x3500, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 654311424, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x27000000, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_s32(a, 0x27000000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.orr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 175, <8 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0xaf, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_u16(a, 0xaf, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 89, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x59, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_u32(a, 0x59, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xf00, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_s16(inactive, 0xf00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -18945, i32 -18945, i32 -18945, i32 -18945>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0x4a00, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_s32(inactive, 0x4a00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xa500, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_u16(inactive, 0xa500, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -63489, i32 -63489, i32 -63489, i32 -63489>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xf800, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_u32(inactive, 0xf800, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767>, <8 x i16> undef
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p)
+{
+    return vmvnq_x_n_s16(0xfd00, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -12189697, i32 -12189697, i32 -12189697, i32 -12189697>, <4 x i32> undef
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p)
+{
+    return vmvnq_x_n_s32(0xba0000, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505>, <8 x i16> undef
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p)
+{
+    return vmvnq_x_n_u16(0x5400, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -4865, i32 -4865, i32 -4865, i32 -4865>, <4 x i32> undef
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p)
+{
+    return vmvnq_x_n_u32(0x1300, p);
+}
+
diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c
--- a/clang/test/Sema/arm-mve-immediates.c
+++ b/clang/test/Sema/arm-mve-immediates.c
@@ -203,3 +203,73 @@
   vsriq(vw, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}}
   vsriq(vw, vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}}
 }
+
+void test_simd_bic_orr(int16x8_t h, int32x4_t w)
+{
+    h = vbicq(h, 0x0000);
+    h = vbicq(h, 0x0001);
+    h = vbicq(h, 0x00FF);
+    h = vbicq(h, 0x0100);
+    h = vbicq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vbicq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vbicq(h, 0xFF00);
+
+    w = vbicq(w, 0x00000000);
+    w = vbicq(w, 0x00000001);
+    w = vbicq(w, 0x000000FF);
+    w = vbicq(w, 0x00000100);
+    w = vbicq(w, 0x0000FF00);
+    w = vbicq(w, 0x00010000);
+    w = vbicq(w, 0x00FF0000);
+    w = vbicq(w, 0x01000000);
+    w = vbicq(w, 0xFF000000);
+    w = vbicq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    w = vbicq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+
+    h = vorrq(h, 0x0000);
+    h = vorrq(h, 0x0001);
+    h = vorrq(h, 0x00FF);
+    h = vorrq(h, 0x0100);
+    h = vorrq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vorrq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vorrq(h, 0xFF00);
+
+    w = vorrq(w, 0x00000000);
+    w = vorrq(w, 0x00000001);
+    w = vorrq(w, 0x000000FF);
+    w = vorrq(w, 0x00000100);
+    w = vorrq(w, 0x0000FF00);
+    w = vorrq(w, 0x00010000);
+    w = vorrq(w, 0x00FF0000);
+    w = vorrq(w, 0x01000000);
+    w = vorrq(w, 0xFF000000);
+    w = vorrq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    w = vorrq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+}
+
+void test_simd_vmvn(void)
+{
+    uint16x8_t h;
+    h = vmvnq_n_u16(0x0000);
+    h = vmvnq_n_u16(0x0001);
+    h = vmvnq_n_u16(0x00FF);
+    h = vmvnq_n_u16(0x0100);
+    h = vmvnq_n_u16(0x0101); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    h = vmvnq_n_u16(0x01FF);
+    h = vmvnq_n_u16(0xFF00);
+
+    uint32x4_t w;
+    w = vmvnq_n_u32(0x00000000);
+    w = vmvnq_n_u32(0x00000001);
+    w = vmvnq_n_u32(0x000000FF);
+    w = vmvnq_n_u32(0x00000100);
+    w = vmvnq_n_u32(0x0000FF00);
+    w = vmvnq_n_u32(0x00010000);
+    w = vmvnq_n_u32(0x00FF0000);
+    w = vmvnq_n_u32(0x01000000);
+    w = vmvnq_n_u32(0xFF000000);
+    w = vmvnq_n_u32(0x01000001); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x01FFFFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x0001FFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x000001FF);
+}
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -882,38 +882,38 @@
         break;
       case ImmediateArg::BoundsType::UInt:
         lo = 0;
-        hi = IA.i1;
+        hi = llvm::APInt::getMaxValue(IA.i1).zext(128);
         break;
       }
 
-      llvm::APInt typelo, typehi;
-      unsigned Bits = IA.ArgType->sizeInBits();
-      if (cast<ScalarType>(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) {
-        typelo = llvm::APInt::getSignedMinValue(Bits).sext(128);
-        typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128);
-      } else {
-        typelo = llvm::APInt::getMinValue(Bits).zext(128);
-        typehi = llvm::APInt::getMaxValue(Bits).zext(128);
-      }
-
       std::string Index = utostr(kv.first);
 
-      if (lo.sle(typelo) && hi.sge(typehi))
-        SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")");
-      else
+      unsigned ArgTypeBits = IA.ArgType->sizeInBits();
+      llvm::APInt ArgTypeRange = llvm::APInt::getMaxValue(ArgTypeBits).zext(128);
+      llvm::APInt ActualRange = (hi-lo).trunc(64).sext(128);
+      if (ActualRange.ult(ArgTypeRange))
         SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index +
                              ", " + signedHexLiteral(lo) + ", " +
                              signedHexLiteral(hi) + ")");
 
       if (!IA.ExtraCheckType.empty()) {
         std::string Suffix;
-        if (!IA.ExtraCheckArgs.empty())
-          Suffix = (Twine(", ") + IA.ExtraCheckArgs).str();
+        if (!IA.ExtraCheckArgs.empty()) {
+          std::string tmp;
+          StringRef Arg = IA.ExtraCheckArgs;
+          if (Arg == "!lanesize") {
+            tmp = utostr(IA.ArgType->sizeInBits());
+            Arg = tmp;
+          }
+          Suffix = (Twine(", ") + Arg).str();
+        }
         SemaChecks.push_back((Twine("SemaBuiltinConstantArg") +
                               IA.ExtraCheckType + "(TheCall, " + Index +
                               Suffix + ")")
                                  .str());
       }
+
+      assert(!SemaChecks.empty());
     }
     if (SemaChecks.empty())
       return "";
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1133,4 +1133,10 @@
   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
   llvm_anyvector_ty>;
+
+def int_arm_mve_bic_imm_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
+def int_arm_mve_orr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
+
 } // end TargetPrefix
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -268,6 +268,20 @@
   void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
                      const uint16_t *const *Opcodes);
 
+  /// SelectMVE_VBIC_VORR_imm - Select immediate forms of MVE VBIC and
+  /// VORR instructions, which have different MC ids depending on the
+  /// shape of the constant. N should be an ARMISD::VBICIMM / VORRIMM
+  /// node; the array Opcodes has 6 entries, used for 8-bit constants
+  /// shifted left by 0, 8, 16 or 24 bits in a 32-bit word, and 0 or 8
+  /// bits in a 16-bit word, respectively.
+  void SelectMVE_VBIC_VORR_imm(SDNode *N, const uint16_t *Opcodes);
+
+  /// SelectMVE_VBIC_VORR_imm_predicated - Select immediate MVE VBIC
+  /// and VORR, but this time the predicated forms, starting from a
+  /// node that is a call to the arm.mve.{bic,orr}.predicated
+  /// intrinsic.
+  void SelectMVE_VBIC_VORR_imm_predicated(SDNode *N, const uint16_t *Opcodes);
+
   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
   /// for loading D registers.
@@ -2581,6 +2595,91 @@
   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
 }
 
+static const uint16_t MVE_VBIC_Opcodes[] = {
+  ARM::MVE_VBICIZ0v4i32, ARM::MVE_VBICIZ8v4i32,
+  ARM::MVE_VBICIZ16v4i32, ARM::MVE_VBICIZ24v4i32,
+  ARM::MVE_VBICIZ0v8i16, ARM::MVE_VBICIZ8v8i16,
+};
+static const uint16_t MVE_VORR_Opcodes[] = {
+  ARM::MVE_VORRIZ0v4i32, ARM::MVE_VORRIZ8v4i32,
+  ARM::MVE_VORRIZ16v4i32, ARM::MVE_VORRIZ24v4i32,
+  ARM::MVE_VORRIZ0v8i16, ARM::MVE_VORRIZ8v8i16,
+};
+
+void ARMDAGToDAGISel::SelectMVE_VBIC_VORR_imm(SDNode *N,
+                                              const uint16_t *Opcodes) {
+  SDLoc Loc(N);
+
+  // The VBICIMM or VORRIMM SDNode will have encoded the constant in
+  // NEON form, which we unpack to select between MVE opcodes.
+  //
+  // We expect its low 8 bits to be the literal 8-bit value that will
+  // be shifted to form the true constant, and the higher bits to
+  // specify the NEON control bits. In our case those bits will be
+  // 0,2,4,6,8,10 for the six entries in our Opcodes array.
+  unsigned NeonConst = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  unsigned ShiftedValue = NeonConst & 0xFF;
+  unsigned ControlBits = NeonConst >> 8;
+  assert(ControlBits % 2 == 0 &&
+         "unexpected NEON constant encoding for MVE VBIC/VORR");
+  unsigned OpcodeIndex = ControlBits >> 1;
+  assert(OpcodeIndex < 6 &&
+         "unexpected NEON constant encoding for MVE VBIC/VORR");
+  uint16_t Opcode = Opcodes[OpcodeIndex];
+
+  // The actual constant operand will have the same value that would
+  // be shown in assembly. So we shift it left by the right number of
+  // bytes, which is 0,1,2,3 for the first four opcode types (with a
+  // 32-bit word size) and then 0,1 again for the other two (16-bit).
+  unsigned ShiftBytes = OpcodeIndex & 3;
+  uint32_t MveConst = ShiftedValue << (8 * ShiftBytes);
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(N->getOperand(0));
+  Ops.push_back(CurDAG->getTargetConstant(MveConst, Loc, MVT::i32));
+  // FIXME take that predicate away again
+  AddEmptyMVEPredicateToOps(Ops, Loc);
+  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectMVE_VBIC_VORR_imm_predicated(
+    SDNode *N, const uint16_t *Opcodes) {
+  SDLoc Loc(N);
+
+  uint32_t MveConst = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+
+  unsigned ElementBits;
+  switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+  case MVT::v8i16:
+    ElementBits = 16;
+    break;
+  case MVT::v4i32:
+    ElementBits = 32;
+    break;
+  default:
+    llvm_unreachable("bad vector type in SelectMVE_VBIC_VORR_imm_predicated");
+  }
+
+  unsigned Shift, ShiftedValue;
+  for (Shift = 0; Shift < ElementBits; Shift += 8) {
+    ShiftedValue = MveConst >> Shift;
+    if (MveConst == ShiftedValue << Shift)
+      break;
+  }
+  assert(Shift < ElementBits &&
+         "bad constant in SelectMVE_VBIC_VORR_imm_predicated");
+
+  unsigned ShiftBytes = Shift / 8;
+  unsigned OpcodeIndex = (ElementBits == 16 ? 4 : 0) + ShiftBytes;
+  uint16_t Opcode = Opcodes[OpcodeIndex];
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(N->getOperand(1));
+  Ops.push_back(CurDAG->getTargetConstant(MveConst, Loc, MVT::i32));
+  AddMVEPredicateToOps(Ops, Loc, N->getOperand(3));
+  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
 static bool SDValueToConstBool(SDValue SDVal) {
   assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
   ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
@@ -4573,6 +4672,14 @@
                           OpcodesS, OpcodesU);
       return;
     }
+
+    case Intrinsic::arm_mve_bic_imm_predicated:
+      SelectMVE_VBIC_VORR_imm_predicated(N, MVE_VBIC_Opcodes);
+      return;
+    case Intrinsic::arm_mve_orr_imm_predicated:
+      SelectMVE_VBIC_VORR_imm_predicated(N, MVE_VORR_Opcodes);
+      return;
+
     }
     break;
   }
@@ -4580,6 +4687,26 @@
   case ISD::ATOMIC_CMP_SWAP:
     SelectCMP_SWAP(N);
     return;
+
+  case ARMISD::VBICIMM:
+    if (Subtarget->hasMVEIntegerOps()) {
+      // The MVE version of this instruction is divided into
+      // sub-opcodes in a way that makes it tricky to select using
+      // Tablegen patterns, so we use custom C++.
+      SelectMVE_VBIC_VORR_imm(N, MVE_VBIC_Opcodes);
+      return;
+    }
+    // On NEON, Tablegen can handle the job.
+    break;
+
+  case ARMISD::VORRIMM:
+    // As with VBICIMM, we have to do this by hand on MVE, and leave
+    // it to the automation otherwise.
+    if (Subtarget->hasMVEIntegerOps()) {
+      SelectMVE_VBIC_VORR_imm(N, MVE_VORR_Opcodes);
+      return;
+    }
+    break;
   }
 
   SelectCode(N);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -12176,7 +12176,7 @@
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN && Subtarget->hasNEON() &&
+  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VbicVT;
@@ -12483,7 +12483,7 @@
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN && Subtarget->hasNEON() &&
+  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VorrVT;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -274,6 +274,10 @@
 def ARMvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
 def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
 
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                           SDTCisVT<2, i32>]>;
+def ARMvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def ARMvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
 
 def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
                                         SDTCisVT<2, i32>]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2265,6 +2265,15 @@
 
   def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
             (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+
+  def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+                            MQPR:$inactive)),
+            (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
+                            ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+  def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+                            MQPR:$inactive)),
+            (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
+                            ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
 }
 
 class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -509,11 +509,6 @@
 def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
 def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
-def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
-                                           SDTCisVT<2, i32>]>;
-def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
-def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
-
 def NEONvbsl      : SDNode<"ARMISD::VBSL",
                            SDTypeProfile<1, 3, [SDTCisVec<0>,
                                                 SDTCisSameAs<0, 1>,
@@ -5296,7 +5291,7 @@
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+                            (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5305,7 +5300,7 @@
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+                            (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5314,7 +5309,7 @@
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+                            (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5323,7 +5318,7 @@
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+                            (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5347,7 +5342,7 @@
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+                            (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5356,7 +5351,7 @@
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+                            (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5365,7 +5360,7 @@
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+                            (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5374,7 +5369,7 @@
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+                            (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
--- a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
@@ -588,7 +588,7 @@
 ; CHECK-NEXT:    vmov.16 q0[5], r2
 ; CHECK-NEXT:    vmov.16 q0[6], r1
 ; CHECK-NEXT:    vmov.16 q0[7], r4
-; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll
@@ -0,0 +1,343 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) {
+; CHECK-LABEL: test_vbicq_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) {
+; CHECK-LABEL: test_vbicq_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) {
+; CHECK-LABEL: test_vorrq_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) {
+; CHECK-LABEL: test_vorrq_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.bic.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 100, <8 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.bic.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 25600, <8 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 100, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6553600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 1677721600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.orr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 100, <8 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.orr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 25600, <8 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 100, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6553600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 1677721600, <4 x i1> %1)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() {
+; CHECK-LABEL: test_vmvnq_n_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn.i16 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  ret <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() {
+; CHECK-LABEL: test_vmvnq_n_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn.i32 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  ret <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_n_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt.i16 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = select <8 x i1> %1, <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>, <8 x i16> %inactive
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_n_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt.i32 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = select <4 x i1> %1, <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>, <4 x i32> %inactive
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <8 x i16> @llvm.arm.mve.bic.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <4 x i32> @llvm.arm.mve.bic.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
+declare <8 x i16> @llvm.arm.mve.orr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>)
+declare <4 x i32> @llvm.arm.mve.orr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -998,21 +998,21 @@
 define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_other(<8 x i8> *%dest, <8 x i8> %a) {
 ; CHECK-LE-LABEL: zext8_masked_v8i16_align1_other:
 ; CHECK-LE:       @ %bb.0: @ %entry
-; CHECK-LE-NEXT:    vmovlb.u8 q1, q0
-; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
-; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
-; CHECK-LE-NEXT:    vldrbt.u16 q0, [r0]
-; CHECK-LE-NEXT:    vpsel q0, q0, q1
+; CHECK-LE-NEXT:    vmovlb.s8 q1, q0
+; CHECK-LE-NEXT:    vpt.s16 gt, q1, zr
+; CHECK-LE-NEXT:    vldrbt.u16 q1, [r0]
+; CHECK-LE-NEXT:    vbic.i16 q0, #0xff00
+; CHECK-LE-NEXT:    vpsel q0, q1, q0
 ; CHECK-LE-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: zext8_masked_v8i16_align1_other:
 ; CHECK-BE:       @ %bb.0: @ %entry
 ; CHECK-BE-NEXT:    vrev64.16 q1, q0
-; CHECK-BE-NEXT:    vmovlb.u8 q0, q1
-; CHECK-BE-NEXT:    vmovlb.s8 q1, q1
-; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
-; CHECK-BE-NEXT:    vldrbt.u16 q1, [r0]
-; CHECK-BE-NEXT:    vpsel q1, q1, q0
+; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
+; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
+; CHECK-BE-NEXT:    vldrbt.u16 q0, [r0]
+; CHECK-BE-NEXT:    vbic.i16 q1, #0xff00
+; CHECK-BE-NEXT:    vpsel q1, q0, q1
 ; CHECK-BE-NEXT:    vrev64.16 q0, q1
 ; CHECK-BE-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll
--- a/llvm/test/CodeGen/Thumb2/mve-sext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll
@@ -277,7 +277,7 @@
 define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
 ; CHECK-LABEL: zext_v8i8_v8i16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = zext <8 x i8> %src to <8 x i16>
@@ -308,41 +308,41 @@
 define arm_aapcs_vfpcc <16 x i16> @zext_v16i8_v16i16(<16 x i8> %src) {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov q2, q0
 ; CHECK-NEXT:    vmov.u8 r0, q0[0]
+; CHECK-NEXT:    vmov.16 q0[0], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[1]
+; CHECK-NEXT:    vmov.16 q0[1], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[2]
+; CHECK-NEXT:    vmov.16 q0[2], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[3]
+; CHECK-NEXT:    vmov.16 q0[3], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[4]
+; CHECK-NEXT:    vmov.16 q0[4], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[5]
+; CHECK-NEXT:    vmov.16 q0[5], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[6]
+; CHECK-NEXT:    vmov.16 q0[6], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[7]
+; CHECK-NEXT:    vmov.16 q0[7], r0
+; CHECK-NEXT:    vmov.u8 r0, q2[8]
 ; CHECK-NEXT:    vmov.16 q1[0], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[1]
-; CHECK-NEXT:    vmov.16 q1[1], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[2]
-; CHECK-NEXT:    vmov.16 q1[2], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[3]
-; CHECK-NEXT:    vmov.16 q1[3], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[4]
-; CHECK-NEXT:    vmov.16 q1[4], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[5]
-; CHECK-NEXT:    vmov.16 q1[5], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[6]
-; CHECK-NEXT:    vmov.16 q1[6], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[7]
-; CHECK-NEXT:    vmov.16 q1[7], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[8]
-; CHECK-NEXT:    vmovlb.u8 q2, q1
-; CHECK-NEXT:    vmov.16 q1[0], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[9]
+; CHECK-NEXT:    vmov.u8 r0, q2[9]
 ; CHECK-NEXT:    vmov.16 q1[1], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[10]
+; CHECK-NEXT:    vmov.u8 r0, q2[10]
 ; CHECK-NEXT:    vmov.16 q1[2], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[11]
+; CHECK-NEXT:    vmov.u8 r0, q2[11]
 ; CHECK-NEXT:    vmov.16 q1[3], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[12]
+; CHECK-NEXT:    vmov.u8 r0, q2[12]
 ; CHECK-NEXT:    vmov.16 q1[4], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[13]
+; CHECK-NEXT:    vmov.u8 r0, q2[13]
 ; CHECK-NEXT:    vmov.16 q1[5], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[14]
+; CHECK-NEXT:    vmov.u8 r0, q2[14]
 ; CHECK-NEXT:    vmov.16 q1[6], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[15]
+; CHECK-NEXT:    vmov.u8 r0, q2[15]
 ; CHECK-NEXT:    vmov.16 q1[7], r0
-; CHECK-NEXT:    vmov q0, q2
-; CHECK-NEXT:    vmovlb.u8 q1, q1
+; CHECK-NEXT:    vbic.i16 q0, #0xff00
+; CHECK-NEXT:    vbic.i16 q1, #0xff00
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = zext <16 x i8> %src to <16 x i16>
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffleext.ll b/llvm/test/CodeGen/Thumb2/mve-shuffleext.ll
--- a/llvm/test/CodeGen/Thumb2/mve-shuffleext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffleext.ll
@@ -73,7 +73,7 @@
 define arm_aapcs_vfpcc <8 x i16> @zext_02468101214(<16 x i8> %src) {
 ; CHECK-LABEL: zext_02468101214:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
 entry:
   %strided.vec = shufflevector <16 x i8> %src, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -85,7 +85,7 @@
 ; CHECK-LABEL: zext_13579111315:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vrev16.8 q0, q0
-; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
 entry:
   %strided.vec = shufflevector <16 x i8> %src, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>