Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3508,6 +3508,30 @@
       return II;
     break;
   }
+  case Intrinsic::arm_mve_vctp8:
+  case Intrinsic::arm_mve_vctp16:
+  case Intrinsic::arm_mve_vctp32:
+  case Intrinsic::arm_mve_vctp64: {
+    Value *DataArg = II->getArgOperand(0);
+    if (auto *C = dyn_cast<ConstantInt>(DataArg)) {
+      unsigned Lanes = cast<FixedVectorType>(II->getType())->getNumElements();
+      uint64_t Limit = C->getZExtValue();
+      // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
+      // sure we get the limit right in that case and set all relevant lanes.
+      if (II->getIntrinsicID() == Intrinsic::arm_mve_vctp64)
+        Limit *= 2;
+
+      SmallVector<Constant*, 16> NCs;
+      for(unsigned i = 0; i < Lanes; i++) {
+        if (i < Limit)
+          NCs.push_back(ConstantInt::get(Builder.getInt1Ty(), 1));
+        else
+          NCs.push_back(ConstantInt::get(Builder.getInt1Ty(), 0));
+      }
+      return replaceInstUsesWith(*II, ConstantVector::get(NCs));
+    }
+    break;
+  }
   case Intrinsic::amdgcn_rcp: {
     Value *Src = II->getArgOperand(0);
 
Index: llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll
===================================================================
--- llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll
+++ llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll
@@ -6,8 +6,7 @@
 define <16 x i1> @vctp8_0() {
 ; CHECK-LABEL: @vctp8_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 0)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> zeroinitializer
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0)
@@ -17,8 +16,7 @@
 define <16 x i1> @vctp8_1() {
 ; CHECK-LABEL: @vctp8_1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 1)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1)
@@ -28,8 +26,7 @@
 define <16 x i1> @vctp8_8() {
 ; CHECK-LABEL: @vctp8_8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 8)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8)
@@ -39,8 +36,7 @@
 define <16 x i1> @vctp8_15() {
 ; CHECK-LABEL: @vctp8_15(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 15)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15)
@@ -50,8 +46,7 @@
 define <16 x i1> @vctp8_16() {
 ; CHECK-LABEL: @vctp8_16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 16)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16)
@@ -61,8 +56,7 @@
 define <16 x i1> @vctp8_100() {
 ; CHECK-LABEL: @vctp8_100(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 100)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100)
@@ -72,8 +66,7 @@
 define <16 x i1> @vctp8_m1() {
 ; CHECK-LABEL: @vctp8_m1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1)
-; CHECK-NEXT:    ret <16 x i1> [[INT]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1)
@@ -85,8 +78,7 @@
 define <8 x i1> @vctp16_0() {
 ; CHECK-LABEL: @vctp16_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 0)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0)
@@ -96,8 +88,7 @@
 define <8 x i1> @vctp16_1() {
 ; CHECK-LABEL: @vctp16_1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 1)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1)
@@ -107,8 +98,7 @@
 define <8 x i1> @vctp16_4() {
 ; CHECK-LABEL: @vctp16_4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 4)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4)
@@ -118,8 +108,7 @@
 define <8 x i1> @vctp16_7() {
 ; CHECK-LABEL: @vctp16_7(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 7)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7)
@@ -129,8 +118,7 @@
 define <8 x i1> @vctp16_8() {
 ; CHECK-LABEL: @vctp16_8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 8)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8)
@@ -140,8 +128,7 @@
 define <8 x i1> @vctp16_100() {
 ; CHECK-LABEL: @vctp16_100(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 100)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100)
@@ -151,8 +138,7 @@
 define <8 x i1> @vctp16_m1() {
 ; CHECK-LABEL: @vctp16_m1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1)
-; CHECK-NEXT:    ret <8 x i1> [[INT]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1)
@@ -164,8 +150,7 @@
 define <4 x i1> @vctp32_0() {
 ; CHECK-LABEL: @vctp32_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 0)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0)
@@ -175,8 +160,7 @@
 define <4 x i1> @vctp32_1() {
 ; CHECK-LABEL: @vctp32_1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 1)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1)
@@ -186,8 +170,7 @@
 define <4 x i1> @vctp32_3() {
 ; CHECK-LABEL: @vctp32_3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 3)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3)
@@ -197,8 +180,7 @@
 define <4 x i1> @vctp32_4() {
 ; CHECK-LABEL: @vctp32_4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 4)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4)
@@ -208,8 +190,7 @@
 define <4 x i1> @vctp32_100() {
 ; CHECK-LABEL: @vctp32_100(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 100)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100)
@@ -219,8 +200,7 @@
 define <4 x i1> @vctp32_m1() {
 ; CHECK-LABEL: @vctp32_m1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1)
@@ -232,8 +212,7 @@
 define <4 x i1> @vctp64_0() {
 ; CHECK-LABEL: @vctp64_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 0)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0)
@@ -243,8 +222,7 @@
 define <4 x i1> @vctp64_1() {
 ; CHECK-LABEL: @vctp64_1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 1)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1)
@@ -254,8 +232,7 @@
 define <4 x i1> @vctp64_2() {
 ; CHECK-LABEL: @vctp64_2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 2)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2)
@@ -265,8 +242,7 @@
 define <4 x i1> @vctp64_100() {
 ; CHECK-LABEL: @vctp64_100(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 100)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100)
@@ -276,8 +252,7 @@
 define <4 x i1> @vctp64_m1() {
 ; CHECK-LABEL: @vctp64_m1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)
-; CHECK-NEXT:    ret <4 x i1> [[INT]]
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
 ;
 entry:
   %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)