Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3508,6 +3508,30 @@ return II; break; } + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: { + Value *DataArg = II->getArgOperand(0); + if (auto *C = dyn_cast(DataArg)) { + unsigned Lanes = cast(II->getType())->getNumElements(); + uint64_t Limit = C->getZExtValue(); + // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make + // sure we get the limit right in that case and set all relevant lanes. + if (II->getIntrinsicID() == Intrinsic::arm_mve_vctp64) + Limit *= 2; + + SmallVector NCs; + for(unsigned i = 0; i < Lanes; i++) { + if (i < Limit) + NCs.push_back(ConstantInt::get(Builder.getInt1Ty(), 1)); + else + NCs.push_back(ConstantInt::get(Builder.getInt1Ty(), 0)); + } + return replaceInstUsesWith(*II, ConstantVector::get(NCs)); + } + break; + } case Intrinsic::amdgcn_rcp: { Value *Src = II->getArgOperand(0); Index: llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll =================================================================== --- llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll +++ llvm/test/Transforms/InstCombine/ARM/mve-vctp.ll @@ -6,8 +6,7 @@ define <16 x i1> @vctp8_0() { ; CHECK-LABEL: @vctp8_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 0) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> zeroinitializer ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0) @@ -17,8 +16,7 @@ define <16 x i1> @vctp8_1() { ; CHECK-LABEL: @vctp8_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 1) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1) @@ -28,8 +26,7 @@ define <16 x i1> @vctp8_8() { ; CHECK-LABEL: @vctp8_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 8) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8) @@ -39,8 +36,7 @@ define <16 x i1> @vctp8_15() { ; CHECK-LABEL: @vctp8_15( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 15) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15) @@ -50,8 +46,7 @@ define <16 x i1> @vctp8_16() { ; CHECK-LABEL: @vctp8_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 16) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16) @@ -61,8 +56,7 @@ define <16 x i1> @vctp8_100() { ; CHECK-LABEL: @vctp8_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 100) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100) @@ -72,8 +66,7 @@ define <16 x i1> @vctp8_m1() { ; CHECK-LABEL: @vctp8_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1) -; CHECK-NEXT: ret <16 x i1> [[INT]] +; CHECK-NEXT: ret <16 x i1> ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1) @@ -85,8 +78,7 @@ define <8 x i1> @vctp16_0() { ; CHECK-LABEL: @vctp16_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 0) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> zeroinitializer ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0) @@ -96,8 +88,7 @@ define <8 x i1> @vctp16_1() { ; CHECK-LABEL: @vctp16_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 1) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1) @@ -107,8 +98,7 @@ define <8 x i1> @vctp16_4() { ; CHECK-LABEL: @vctp16_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 4) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4) @@ -118,8 +108,7 @@ define <8 x i1> @vctp16_7() { ; CHECK-LABEL: @vctp16_7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 7) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7) @@ -129,8 +118,7 @@ define <8 x i1> @vctp16_8() { ; CHECK-LABEL: @vctp16_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 8) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8) @@ -140,8 +128,7 @@ define <8 x i1> @vctp16_100() { ; CHECK-LABEL: @vctp16_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 100) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100) @@ -151,8 +138,7 @@ define <8 x i1> @vctp16_m1() { ; CHECK-LABEL: @vctp16_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1) -; CHECK-NEXT: ret <8 x i1> [[INT]] +; CHECK-NEXT: ret <8 x i1> ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1) @@ -164,8 +150,7 @@ define <4 x i1> @vctp32_0() { ; CHECK-LABEL: @vctp32_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 0) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0) @@ -175,8 +160,7 @@ define <4 x i1> @vctp32_1() { ; CHECK-LABEL: @vctp32_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 1) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1) @@ -186,8 +170,7 @@ define <4 x i1> @vctp32_3() { ; CHECK-LABEL: @vctp32_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 3) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3) @@ -197,8 +180,7 @@ define <4 x i1> @vctp32_4() { ; CHECK-LABEL: @vctp32_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 4) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4) @@ -208,8 +190,7 @@ define <4 x i1> @vctp32_100() { ; CHECK-LABEL: @vctp32_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 100) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100) @@ -219,8 +200,7 @@ define <4 x i1> @vctp32_m1() { ; CHECK-LABEL: @vctp32_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1) @@ -232,8 +212,7 @@ define <4 x i1> @vctp64_0() { ; CHECK-LABEL: @vctp64_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 0) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0) @@ -243,8 +222,7 @@ define <4 x i1> @vctp64_1() { ; CHECK-LABEL: @vctp64_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 1) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1) @@ -254,8 +232,7 @@ define <4 x i1> @vctp64_2() { ; CHECK-LABEL: @vctp64_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 2) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2) @@ -265,8 +242,7 @@ define <4 x i1> @vctp64_100() { ; CHECK-LABEL: @vctp64_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 100) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100) @@ -276,8 +252,7 @@ define <4 x i1> @vctp64_m1() { ; CHECK-LABEL: @vctp64_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1) -; CHECK-NEXT: ret <4 x i1> [[INT]] +; CHECK-NEXT: ret <4 x i1> ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)